In [1]:
#Feature engineering and Modeling with Evaluation to Black friday

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

os.chdir(r"C:\Users\User\Downloads")

df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

df = pd.concat([df_train,df_test],ignore_index=True)
df.head()
df.info()

#different Feature engineering techniques
#===========================================

#droping non important columns
df.drop(['User_ID' , 'Product_ID'] , axis = 1 , inplace =True)
df.head()

#Dealing with Gender feature
df['Gender'] = pd.get_dummies(df['Gender'] , drop_first=True)
df['Gender']
df.shape

#Dealing with Age feature
df['Age'].unique()
df['Age'] = df['Age'].map({'0-17' : 1 , '18-25' : 2 , '26-35' : 3 , '36-45' : 4 , '46-50' : 5 , '51-55' : 6 ,'55+' : 7})
df['Age'].tail(5)



#Dealing with City Category feature
df.info()
df['City_Category'].head(5)
df['City_Category'].unique()

from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(drop='first' , sparse_output=False)
df['City_Category'] = encoder.fit_transform(df[['City_Category']])
df['City_Category'].tail(5)
df.shape
df.info()

#Dealing with null values of Product Category 2
df.isnull().sum()
df['Product_Category_2'].unique()
df['Product_Category_2'].value_counts()
df['Product_Category_2'] = df['Product_Category_2'].fillna(df['Product_Category_2'].mode()[0])
df['Product_Category_2'].isnull().sum()

##Dealing with null values of Product Category 3
df.isnull().sum()
df['Product_Category_3'].unique()
df['Product_Category_3'] = df['Product_Category_3'].fillna(df['Product_Category_3'].mode()[0])
df['Product_Category_3'].isnull().sum()

#Dealing with Stay In Current City Years feature
df.info()
df['Stay_In_Current_City_Years'].head(5)
df['Stay_In_Current_City_Years'].unique()
df['Stay_In_Current_City_Years']=df['Stay_In_Current_City_Years'].str.replace('+','')
df['Stay_In_Current_City_Years'] = df['Stay_In_Current_City_Years'].astype(int)

#Dealing with Missing values in the targe column
df['Purchase'].isnull().sum()
df['Purchase'].unique()
df['Purchase'] = df['Purchase'].fillna(df['Purchase'].mean())
df['Purchase'].isnull().sum()

#Feature scaling
df.info()
x = df.drop('Purchase' , axis= 1)
y = df['Purchase']

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x = sc.fit_transform(x)
x

#Spliting the data
from sklearn.model_selection import train_test_split
x_train , x_test , y_train , y_test = train_test_split(x, y , train_size=0.8,random_state=10)

#data is ready for modeling now
#==================================

from sklearn.linear_model import LassoCV
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error , mean_squared_error , r2_score

#apply grid search to select the best model with the best parameters

models = [ ('Lasso' , LassoCV() ,
            {'max_iter': [1000, 5000, 10000],
             'tol': [1e-4, 1e-3, 1e-2]} ) ,

          ('Support Vector Machine' ,SVR() ,
           {'C':[0.1 , 1 , 10] ,'kernel' :['rbf' , 'poly','sigmoid','linear'] ,
            'gamma': ['scale', 'auto', 0.1, 1] } ) ,

          ('Random Forest' , RandomForestRegressor() ,
           {'n_estimators': [50, 100, 200],
            'max_depth': [None, 10, 20, 30] , 'min_samples_split': [2, 5, 10]}
          )
    ]

best_model = None
best_parameters = None
best_score = 0

for name, model, model_param in models:
    grid_search = GridSearchCV(model, model_param, cv=5, scoring='neg_mean_squared_error', n_jobs=-1 , error_score='raise')
    grid_search.fit(x_train, y_train)

    print(f"model : {name} ---> score : {grid_search.best_score_}")
    print("===============================")

    if best_score < grid_search.best_score_:
        best_model = model
        best_parameters = grid_search.best_params_
        best_score = grid_search.best_score_

print(f"best model : {best_model}")
print(f"best parameters : {best_parameters}")
print(f"best score : {best_score}")







FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads'