In [692]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from xgboost import XGBClassifier
import lightgbm as lgb

In [693]:
train_df = pd.read_csv('train_wn75k28.csv')

In [696]:
train_df[['created_at','signup_date']] = train_df[['created_at','signup_date']].apply(pd.to_datetime)

##### Dealing with null values in products_purchased

In [698]:
mode_value=train_df['products_purchased'].mode()[0]

In [699]:
mode_value

2.0

In [700]:
train_df['products_purchased'].fillna(value =mode_value ,inplace = True)

##### Dealing with Null values in signup date

In [702]:
signup_data_list = train_df['signup_date'].dt.date.unique().tolist()

In [704]:
created_at_list = train_df['created_at'].dt.date.unique().tolist()

In [706]:
train_df['signup_date'].fillna(train_df['created_at'], inplace=True)

##### Splitting created_at and signup_date into date,month and year column

In [709]:
train_df['created_at_date'] = train_df['created_at'].dt.day
train_df['created_at_month'] = train_df['created_at'].dt.month
train_df['created_at_year'] = train_df['created_at'].dt.year
train_df['signup_date_date'] = train_df['signup_date'].dt.day
train_df['signup_date_month'] = train_df['signup_date'].dt.month
train_df['signup_date_year'] = train_df['signup_date'].dt.year

##### Dropping created_At and signup_date column

In [711]:
train_df.drop(columns = ['created_at','signup_date'],inplace= True)

In [713]:
train_df.set_index('id',drop=True,inplace = True)

In [715]:
train_df_collist = train_df.columns.to_list()

In [717]:
train_df['products_purchased'] = train_df['products_purchased'].astype(np.int64)

##### Seperating Feature and Target Column

In [719]:
X = train_df.drop(columns = ['buy'])
y = train_df['buy']

In [720]:


scaler = StandardScaler()
X_feature = X
X = scaler.fit_transform(X)

In [721]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = .25,random_state = 0)

##### Creating Models

###### ADA Boost

In [722]:
#ADABOOST

model_AB = AdaBoostClassifier(n_estimators = 400,learning_rate=1.02,algorithm = 'SAMME.R')
model_AB.fit(X_train,y_train)
y_pred_AB = model_AB.predict(X_test)
print("Accuracy: ",accuracy_score(y_test,y_pred_AB))
print("classification Report\n",classification_report(y_test,y_pred_AB))

Accuracy:  0.9746706158717189
classification Report
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      9284
           1       0.95      0.54      0.69       507

    accuracy                           0.97      9791
   macro avg       0.96      0.77      0.84      9791
weighted avg       0.97      0.97      0.97      9791



In [723]:
f1_score(y_test, y_pred_AB)

0.6892230576441102

###### Testing on test data

In [724]:
test_df = pd.read_csv('test_Wf7sxXF.csv')

In [726]:
mode_value=test_df['products_purchased'].mode()[0]

In [727]:
mode_value

2.0

In [728]:
test_df['products_purchased'].fillna(value =mode_value ,inplace = True)

In [729]:
test_df[['created_at','signup_date']] = test_df[['created_at','signup_date']].apply(pd.to_datetime)

##### Dealing with Null values in signup date

In [730]:
signup_data_list = test_df['signup_date'].dt.date.unique().tolist()

In [731]:
created_at_list = test_df['created_at'].dt.date.unique().tolist()

In [732]:
test_df['signup_date'].fillna(test_df['created_at'], inplace=True)

##### Splitting created_at and signup_date into date,month and year column

In [733]:
test_df['created_at_date'] = test_df['created_at'].dt.day
test_df['created_at_month'] = test_df['created_at'].dt.month
test_df['created_at_year'] = test_df['created_at'].dt.year
test_df['signup_date_date'] = test_df['signup_date'].dt.day
test_df['signup_date_month'] = test_df['signup_date'].dt.month
test_df['signup_date_year'] = test_df['signup_date'].dt.year

##### Dropping created_At and signup_date column

In [734]:
test_df.drop(columns = ['created_at','signup_date'],inplace= True)

In [735]:
test_df.set_index('id',drop=True,inplace = True)

In [736]:
train_df_collist = test_df.columns.to_list()

In [738]:
test_df['products_purchased'] = test_df['products_purchased'].astype(np.int64)

In [740]:
#df1.index.equals(df2.index)

##### Seperating Feature and Target Column

In [741]:


scaler = StandardScaler()
X_feature = X
X_testdata = scaler.fit_transform(test_df)

In [742]:
#ADABOOST
y_pred_AB = model_AB.predict(X_testdata)


In [743]:
y_pred_AB

array([1, 0, 0, ..., 0, 0, 0], dtype=int64)

In [745]:
submit_data = pd.DataFrame(index=test_df.index)

In [747]:
submit_data['buy']  =y_pred_AB

In [751]:
submit_data.to_csv('submission.csv')