In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score
import lightgbm as lgb

# load the data
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# preprocess the text data using CountVectorizer and convert the feature matrices to float64 type
vectorizer = CountVectorizer(stop_words='english')
train_feature = vectorizer.fit_transform(train_data['text']).astype(np.float64)
train_label = train_data['label']
test_feature = vectorizer.transform(test_data['text']).astype(np.float64)

# split the data into training and validation sets
val_size = 0.2
X_train, X_val, y_train, y_val = train_test_split(train_feature, train_label, test_size=val_size, random_state=42)

# define the LightGBM model
lgb_model = lgb.LGBMClassifier()

# define the hyperparameter space to search
param_grid = {
    'num_leaves': [31, 63, 127],
    'max_depth': [-1, 8, 16],
    'learning_rate': [0.05, 0.1, 0.2],
    'n_estimators': [100, 500, 1000]
}

# define the evaluation metric
eval_metric = 'f1_macro'

# use GridSearchCV to search for the best hyperparameters based on F1 score
grid_search = GridSearchCV(estimator=lgb_model, param_grid=param_grid, cv=5, scoring=eval_metric)
grid_search.fit(X_train, y_train)

# train the model on the entire training set using the best hyperparameters
best_lgb_model = lgb.LGBMClassifier(**grid_search.best_params_)
best_lgb_model.fit(train_feature, train_label)

# make predictions on the validation set
y_val_pred = best_lgb_model.predict(X_val)

# calculate the f1 score on the validation set
f1_macro = f1_score(y_val, y_val_pred, average='macro')
print("F1 score on validation set:", f1_macro)

# make predictions on the test set
y_test_pred = best_lgb_model.predict(test_feature)

# save the predictions to a CSV file
submission = pd.DataFrame({'id': test_data['id'], 'label': y_test_pred})
submission.to_csv('submission.csv', index=False)
