In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.feature_selection import RFE

In [3]:
# Step 1: Load Preprocessed Data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [4]:
# Step 2: Prepare Data
X_train = train_df['Sequence']
y_train = train_df['Label']
X_test = test_df['Sequence']
test_ids = test_df['ID']  # Extract 'ID' from the test dataset

In [5]:
# Step 3: Feature Extraction (Using TF-IDF as an example)
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [6]:
# Step 4: Apply Recursive Feature Elimination (RFE)
svm_model = SVC(kernel='linear')
rfe = RFE(estimator=svm_model, n_features_to_select=100)  # Adjust the number of features as needed
X_train_rfe = rfe.fit_transform(X_train_tfidf, y_train)
X_test_rfe = rfe.transform(X_test_tfidf)

In [15]:
# Step 5: Train a Machine Learning Model
svm_model.fit(X_train_rfe, y_train)

# Step 6: Make Predictions on Test Data
y_test_pred = svm_model.predict(X_test_rfe)

# Step 7: Create a DataFrame for Predictions
result_df = pd.DataFrame({'ID': test_ids, 'label': y_test_pred})

# Step 8: Save Predictions to 'result.csv'
result_df.to_csv('result2.csv', index=False)


