In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, cross_validate
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier

# Read the data into a dataframe
df = pd.read_csv("cleaned_data.csv")

# Separate the features and target variable
X = df[
    [
        "total_direct_mentions",
        "total_indirect_mentions",
        "total_likes",
        "total_retweets",
        "total_project_followers",
        "total_indirect_followers",
        "total_positive_direct_mentions",
        "total_negative_direct_mentions",
        "total_positive_indirect_mentions",
        "total_negative_indirect_mentions",
        "soft_cap",
    ]
]
y = df["ico_success"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)



# Perform Random Oversampling
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

# Define the parameter grids for grid search
catboost_param_grid = {
        'iterations': [ 1000],
        'learning_rate': [0.03 ],
        'depth': [ 10]
    },


# Define the models
models = {
    'CatBoost': (CatBoostClassifier(), catboost_param_grid),
 
}
result = {
    "Model": [],
    "Best Parameters": [],
    "Average Accuracy": [],
    "Average Precision": [],
    "Average Recall": [],
    "Average F1 Score": [],
}

best_estimators = {}

# Perform grid search and cross-validation for each model
for model_name, (model, param_grid) in models.items():
    print(f"Model: {model_name}")
    print("=" * 50)
    result["Model"].append(model_name)

    # Perform grid search with 5-fold cross-validation
    grid_search = GridSearchCV(
        estimator=model, param_grid=param_grid, cv=5, scoring="accuracy"
    )
    grid_search.fit(X_resampled, y_resampled)

    # Print the best parameters and the corresponding score
    print("Best Parameters: ", grid_search.best_params_)
    print()

    result["Best Parameters"].append(grid_search.best_params_)

    best_estimators[model_name] = grid_search.best_estimator_
best_estimators


result

Model: CatBoost
0:	learn: 0.6800058	total: 195ms	remaining: 3m 15s
1:	learn: 0.6660809	total: 252ms	remaining: 2m 5s
2:	learn: 0.6525236	total: 325ms	remaining: 1m 47s
3:	learn: 0.6390647	total: 391ms	remaining: 1m 37s
4:	learn: 0.6291665	total: 453ms	remaining: 1m 30s
5:	learn: 0.6169147	total: 511ms	remaining: 1m 24s
6:	learn: 0.6071101	total: 564ms	remaining: 1m 19s
7:	learn: 0.5975885	total: 618ms	remaining: 1m 16s
8:	learn: 0.5874806	total: 668ms	remaining: 1m 13s
9:	learn: 0.5797456	total: 727ms	remaining: 1m 11s
10:	learn: 0.5705729	total: 790ms	remaining: 1m 11s
11:	learn: 0.5626615	total: 845ms	remaining: 1m 9s
12:	learn: 0.5565822	total: 910ms	remaining: 1m 9s
13:	learn: 0.5478384	total: 978ms	remaining: 1m 8s
14:	learn: 0.5398451	total: 1.03s	remaining: 1m 7s
15:	learn: 0.5329451	total: 1.09s	remaining: 1m 6s
16:	learn: 0.5251009	total: 1.15s	remaining: 1m 6s
17:	learn: 0.5187407	total: 1.21s	remaining: 1m 6s
18:	learn: 0.5109565	total: 1.27s	remaining: 1m 5s
19:	learn: 0.50

{'Model': ['CatBoost'],
 'Best Parameters': [{'depth': 10, 'iterations': 1000, 'learning_rate': 0.03}],
 'Average Accuracy': [],
 'Average Precision': [],
 'Average Recall': [],
 'Average F1 Score': []}

In [2]:
%%time
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score

result = {
    "Model": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1 Score": [],
}

for model_name in best_estimators:
    model = best_estimators[model_name]
    print(f"Model: {model_name}")
    print("=" * 50)
    result["Model"].append(model_name)
    
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f_measure = f1_score(y_test, y_pred)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F-measure:", f_measure)

    result["Accuracy"].append(accuracy)
    result["Precision"].append(precision)
    result["Recall"].append(recall)
    result["F1 Score"].append(f_measure)

Model: CatBoost
Accuracy: 0.7804878048780488
Precision: 0.7709923664122137
Recall: 0.9439252336448598
F-measure: 0.8487394957983193
Wall time: 11.9 ms
