In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score 


In [5]:
%pip install scikit-learn


Collecting scikit-learn
  Downloading scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.9/12.9 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (40.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, scipy, scikit-lea

In [3]:
data = {
    "Description": [
        "A high school girl with a secret beauty, navigating love and friendships.",
        "A warrior battling against a dark force to protect his kingdom.",
        "A young man with magical powers discovers his destiny.", 
        "A detective solving complex mysteries in a futuristic city.", 
        "A group of friends on a thrilling adventure through a mystical forest.",
        "Romance drama with fake relationship trope.",
        "A fast-paced action series about supernatural powers and self-acceptance",
        'Fantasy adventure with magical creatures',
        'Action-packed fantasy about a hero who saves the world',
        'A romantic comedy about a high school girl who becomes popular',
        "Two warriors from rival clans must team up to defeat a common enemy.",
        "A detective tries to solve the mystery behind a series of supernatural events.",
        "A girl is chosen by an ancient prophecy to save her kingdom from an evil sorcerer.",
        "A woman wakes up in a locked room with no memory of how she got there, and must find a way out."
    ],
    "Category": ["Romance", "Action", "Fantasy", "Mystery", "Adventure", 
        "Romance", "Action", "Fantasy", "Fantasy", "Romance", 
        "Action", "Mystery", "Fantasy", "Mystery"]
}
df = pd.DataFrame(data)

In [4]:
df.head

<bound method NDFrame.head of                                           Description   Category
0   A high school girl with a secret beauty, navig...    Romance
1   A warrior battling against a dark force to pro...     Action
2   A young man with magical powers discovers his ...    Fantasy
3   A detective solving complex mysteries in a fut...    Mystery
4   A group of friends on a thrilling adventure th...  Adventure
5         Romance drama with fake relationship trope.    Romance
6   A fast-paced action series about supernatural ...     Action
7            Fantasy adventure with magical creatures    Fantasy
8   Action-packed fantasy about a hero who saves t...    Fantasy
9   A romantic comedy about a high school girl who...    Romance
10  Two warriors from rival clans must team up to ...     Action
11  A detective tries to solve the mystery behind ...    Mystery
12  A girl is chosen by an ancient prophecy to sav...    Fantasy
13  A woman wakes up in a locked room with no memo...    Mys

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Description  14 non-null     object
 1   Category     14 non-null     object
dtypes: object(2)
memory usage: 356.0+ bytes


In [7]:
df.describe()

Unnamed: 0,Description,Category
count,14,14
unique,14,5
top,"A high school girl with a secret beauty, navig...",Fantasy
freq,1,4


### Data Preprocessing

In [8]:
X = df["Description"]
y = df["Category"]

### Split data into training and testing sets

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Feature Engineering - Bag-of-Words
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)


In [12]:
# Decision Tree
model = DecisionTreeClassifier()
model.fit(X_train_vectorized, y_train) 

In [13]:
# Prediction
y_pred = model.predict(X_test_vectorized)

In [14]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.0


In [16]:
new_description = "A girl is chosen by an ancient prophecy to save her kingdom from an evil sorcerer."
new_description_vectorized = vectorizer.transform([new_description])
predicted_category = model.predict(new_description_vectorized)[0]
print("Predicted Category:", predicted_category)

Predicted Category: Fantasy
