In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

dtree = DecisionTreeClassifier()
dtree.fit(x_train, y_train)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# ----- 1. Load dataset -----
# You need a CSV with columns: 'tweet', 'emotion' (emotion in {"anger","fear",...})
df = pd.read_csv("twitter_emotion.csv")

# Optional: Basic cleaning
df = df.dropna(subset=["tweet", "emotion"])

# ----- 2. Train-test split -----
X = df["tweet"]
y = df["emotion"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ----- 3. TF-IDF vectorization -----
tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1,2), stop_words="english")
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# ----- 4. Model training -----
models = {
    "LogisticRegression": LogisticRegression(max_iter=500),
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42)
}

for name, model in models.items():
    model.fit(X_train_tfidf, y_train)
    preds = model.predict(X_test_tfidf)
    print(f"\n--- {name} Performance ---")
    print("Accuracy:", accuracy_score(y_test, preds))
    print(classification_report(y_test, preds))


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

y_pred_train = dtree.predict(x_train)
y_pred_test = dtree.predict(x_test)

# Precision, Recall, and F1 for train data
precision_train = precision_score(y_train, y_pred_train, average='binary')
recall_train = recall_score(y_train, y_pred_train, average='binary')
f1_train = f1_score(y_train, y_pred_train, average='binary')

# Precision, Recall, and F1 for test data
precision_test = precision_score(y_test, y_pred_test, average='binary')
recall_test = recall_score(y_test, y_pred_test, average='binary')
f1_test = f1_score(y_test, y_pred_test, average='binary')

print("Train Precision:", precision_train)
print("Train Recall:", recall_train)
print("Train F1-score:", f1_train)

print("Test Precision:", precision_test)
print("Test Recall:", recall_test)
print("Test F1-score:", f1_test)

In [None]:
from sklearn.model_selection import GridSearchCV
 
# Define KNN

knn = KNeighborsClassifier()
 
# Hyperparameter grid

param_grid = {

    "n_neighbors": range(1, 21),          # try k=1 to 20

    "weights": ["uniform", "distance"],    # uniform = all neighbors equal, distance = weighted by distance

    "metric": ["euclidean", "manhattan"]   # distance metric

}
 
# GridSearchCV

grid = GridSearchCV(

    estimator=knn,

    param_grid=param_grid,

    cv=5,

    scoring="accuracy",

    n_jobs=-1

)
 
grid.fit(X_train, y_train)
 
# Best hyperparameters

print("Best Parameters:", grid.best_params_)

print("Best CV Accuracy:", grid.best_score_)
 
# Test set performance

y_pred = grid.predict(X_test)

print("Test Accuracy:", accuracy_score(y_test, y_pred))

print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

 

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define base Decision Tree
dt = DecisionTreeClassifier(random_state=42)

# Hyperparameter grid
param_grid = {
    "max_depth": [2, 4, 6, None],
    "min_samples_split": [2, 5, 10],
    "criterion": ["gini", "entropy"]
}

# GridSearchCV
grid = GridSearchCV(
    estimator=dt,
    param_grid=param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)

# Fit GridSearch
grid.fit(X_train, y_train)

# Best parameters
print("Best Parameters (GridSearch):", grid.best_params_)
print("Best CV Accuracy:", grid.best_score_)

# Test set evaluation
y_pred = grid.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# Define hyperparameter distribution
param_dist = {
    "max_depth": [2, 4, 6, None],
    "min_samples_split": [2, 5, 10, 15],
    "criterion": ["gini", "entropy"]
}

# RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=dt,
    param_distributions=param_dist,
    n_iter=10,          # number of random combinations
    cv=5,
    scoring="accuracy",
    random_state=42,
    n_jobs=-1
)

# Fit RandomizedSearch
random_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters (RandomSearch):", random_search.best_params_)
print("Best CV Accuracy:", random_search.best_score_)

# Test set evaluation
y_pred = random_search.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
