<a href="https://colab.research.google.com/github/ZerXXX0/sales-prediction/blob/main/MLQ_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load data
train_df = pd.read_csv('https://raw.githubusercontent.com/ZerXXX0/sales-prediction/refs/heads/main/dataset/train_final.csv')
test_df = pd.read_csv('https://raw.githubusercontent.com/ZerXXX0/sales-prediction/refs/heads/main/dataset/test_final.csv')

# Columns to drop
drop_cols = ['Unnamed: 0', 'TransactionID', 'MemberID']

# Prepare training data
train_df_clean = train_df.drop(columns=drop_cols)
X = train_df_clean.drop(columns=['next_buy'])
y = train_df_clean['next_buy']

# Save column order
train_cols = X.columns.tolist()

# Impute and scale
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Train-val split
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)


In [None]:
clf = SVC(kernel='rbf', C=1.0, gamma='scale', class_weight='balanced')
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_val)
val_acc = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {val_acc * 100:.2f}%")


In [None]:
# Preprocess test set
test_df_clean = test_df.drop(columns=drop_cols)
test_df_clean = test_df_clean[train_cols]  # reorder columns

X_test_imputed = imputer.transform(test_df_clean)
X_test_scaled = scaler.transform(X_test_imputed)

# Predict
test_preds = clf.predict(X_test_scaled)

# Prepare final submission
submission = pd.DataFrame({
    "MemberID": test_df["MemberID"],
    "next_buy": test_preds
})
submission.to_csv("svm_submission.csv", index=False)

In [None]:
submission_df = pd.read_csv("svm_submission.csv")
submission_df.info()

In [None]:
submit_df = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/sales-prediction/refs/heads/main/dataset/sample_submission.csv")
# First, create a new dataframe from submission_df with duplicate MemberIDs removed.
# We keep the 'last' entry for each member.
submission_df_unique = submission_df.drop_duplicates(subset=['MemberID'], keep='last')

# Now, create the lookup map from this de-duplicated dataframe.
# This will succeed because the 'MemberID' index is now unique.
next_buy_lookup = submission_df_unique.set_index('MemberID')['next_buy']

# Proceed with the map operation as before. This will now work correctly.
submit_df['next_buy'] = submit_df['MemberID'].map(next_buy_lookup)

submit_df.info()

In [None]:
submit_df.to_csv('submission_SVM.csv', index=False)