# Code 1: Bank Marketing

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [31]:

# Load the data
train_df = pd.read_csv('termdeposit_train.csv')
test_df = pd.read_csv('termdeposit_test.csv')


In [32]:
train_df.head()

Unnamed: 0,ID,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,subscribed
0,26110,56,admin.,married,unknown,no,1933,no,no,telephone,19,nov,44,2,-1,0,unknown,no
1,40576,31,unknown,married,secondary,no,3,no,no,cellular,20,jul,91,2,-1,0,unknown,no
2,15320,27,services,married,secondary,no,891,yes,no,cellular,18,jul,240,1,-1,0,unknown,no
3,43962,57,management,divorced,tertiary,no,3287,no,no,cellular,22,jun,867,1,84,3,success,yes
4,29842,31,technician,married,secondary,no,119,yes,no,cellular,4,feb,380,1,-1,0,unknown,no


In [33]:
test_df.head()

Unnamed: 0,ID,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,38441,32,services,married,secondary,no,118,yes,no,cellular,15,may,20,6,-1,0,unknown
1,40403,78,retired,divorced,primary,no,2787,no,no,telephone,1,jul,372,1,-1,0,unknown
2,3709,31,self-employed,single,tertiary,no,144,yes,no,unknown,16,may,676,1,-1,0,unknown
3,37422,57,services,single,primary,no,3777,yes,no,telephone,13,may,65,2,-1,0,unknown
4,12527,45,blue-collar,divorced,secondary,no,-705,no,yes,unknown,3,jul,111,1,-1,0,unknown


In [34]:

# Preprocess the data
# Drop the ID column as it is not useful for prediction
train_df = train_df.drop('ID', axis=1)
test_df = test_df.drop('ID', axis=1)


In [35]:
train_df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,subscribed
0,56,admin.,married,unknown,no,1933,no,no,telephone,19,nov,44,2,-1,0,unknown,no
1,31,unknown,married,secondary,no,3,no,no,cellular,20,jul,91,2,-1,0,unknown,no
2,27,services,married,secondary,no,891,yes,no,cellular,18,jul,240,1,-1,0,unknown,no
3,57,management,divorced,tertiary,no,3287,no,no,cellular,22,jun,867,1,84,3,success,yes
4,31,technician,married,secondary,no,119,yes,no,cellular,4,feb,380,1,-1,0,unknown,no


In [36]:
test_df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,32,services,married,secondary,no,118,yes,no,cellular,15,may,20,6,-1,0,unknown
1,78,retired,divorced,primary,no,2787,no,no,telephone,1,jul,372,1,-1,0,unknown
2,31,self-employed,single,tertiary,no,144,yes,no,unknown,16,may,676,1,-1,0,unknown
3,57,services,single,primary,no,3777,yes,no,telephone,13,may,65,2,-1,0,unknown
4,45,blue-collar,divorced,secondary,no,-705,no,yes,unknown,3,jul,111,1,-1,0,unknown


In [37]:

# Encode categorical variables
le = LabelEncoder()
categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day', 'poutcome']
for column in categorical_columns:
    train_df[column] = le.fit_transform(train_df[column])
    test_df[column] = le.transform(test_df[column])


In [38]:
train_df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,subscribed
0,56,0,1,3,0,1933,0,0,1,18,9,44,2,-1,0,3,no
1,31,11,1,1,0,3,0,0,0,19,5,91,2,-1,0,3,no
2,27,7,1,1,0,891,1,0,0,17,5,240,1,-1,0,3,no
3,57,4,0,2,0,3287,0,0,0,21,6,867,1,84,3,2,yes
4,31,9,1,1,0,119,1,0,0,3,3,380,1,-1,0,3,no


In [39]:
test_df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome
0,32,7,1,1,0,118,1,0,0,14,8,20,6,-1,0,3
1,78,5,0,0,0,2787,0,0,1,0,5,372,1,-1,0,3
2,31,6,2,2,0,144,1,0,2,15,8,676,1,-1,0,3
3,57,7,2,0,0,3777,1,0,1,12,8,65,2,-1,0,3
4,45,1,0,1,0,-705,0,1,2,2,5,111,1,-1,0,3


In [40]:

# Split the data into training and testing sets
y_train = train_df['subscribed']
X_train = train_df.drop('subscribed', axis=1)


In [41]:

# Train the model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


In [42]:

# Make predictions on the test set
test_df['subscribed'] =  clf.predict(test_df)


In [None]:

# Evaluate the model
print("Accuracy:", accuracy_score(test_df['', y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [43]:
test_df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,subscribed
0,32,7,1,1,0,118,1,0,0,14,8,20,6,-1,0,3,no
1,78,5,0,0,0,2787,0,0,1,0,5,372,1,-1,0,3,no
2,31,6,2,2,0,144,1,0,2,15,8,676,1,-1,0,3,no
3,57,7,2,0,0,3777,1,0,1,12,8,65,2,-1,0,3,no
4,45,1,0,1,0,-705,0,1,2,2,5,111,1,-1,0,3,no


Above is the test.csv file with subscribed or not mentioned in the code.

# Code 2: Restaurant Food Cost

In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


In [50]:

# Load the training data
train_df = pd.read_excel('Data_Train.xlsx')

# Load the test data
test_df = pd.read_excel('Data_Test.xlsx')

# Drop the TITLE column as it is not useful for prediction
test_df = test_df.drop('TITLE', axis=1)

In [51]:
train_df.head()

Unnamed: 0,TITLE,RESTAURANT_ID,CUISINES,TIME,CITY,LOCALITY,RATING,VOTES,COST
0,CASUAL DINING,9438,"Malwani, Goan, North Indian","11am – 4pm, 7:30pm – 11:30pm (Mon-Sun)",Thane,Dombivali East,3.6,49 votes,1200
1,"CASUAL DINING,BAR",13198,"Asian, Modern Indian, Japanese",6pm – 11pm (Mon-Sun),Chennai,Ramapuram,4.2,30 votes,1500
2,CASUAL DINING,10915,"North Indian, Chinese, Biryani, Hyderabadi","11am – 3:30pm, 7pm – 11pm (Mon-Sun)",Chennai,Saligramam,3.8,221 votes,800
3,QUICK BITES,6346,"Tibetan, Chinese",11:30am – 1am (Mon-Sun),Mumbai,Bandra West,4.1,24 votes,800
4,DESSERT PARLOR,15387,Desserts,11am – 1am (Mon-Sun),Mumbai,Lower Parel,3.8,165 votes,300


In [52]:
test_df.head()

Unnamed: 0,RESTAURANT_ID,CUISINES,TIME,CITY,LOCALITY,RATING,VOTES
0,4085,"North Indian, Chinese, Mughlai, Kebab",12noon – 12midnight (Mon-Sun),Noida,Sector 18,4.3,564 votes
1,12680,"South Indian, Fast Food, Pizza, North Indian",7am – 12:30AM (Mon-Sun),Mumbai,Grant Road,4.2,61 votes
2,1411,"North Indian, Seafood, Biryani, Chinese",11am – 11:30pm (Mon-Sun),Mumbai,Marine Lines,3.8,350 votes
3,204,Biryani,"9am – 10pm (Mon, Wed, Thu, Fri, Sat, Sun), 10:...",Faridabad,NIT,3.8,1445 votes
4,13453,"South Indian, Kerala",11am – 10pm (Mon-Sun),Kochi,Kaloor,3.6,23 votes


In [None]:
import pandas as pd
import numpy as np
import_encoders as ce
from sklearn.model_ import train_test_split
from sklearn.ensemble import RandomstRegressor

# Load the training data
train_df = pd.read_csv('train.csv')

# Load the test data
test_df = pd.read_csv('test.csv')

# Drop the TITLE column as it is not useful for prediction
test_df = test_df.drop('TITLE', axis=1)

# Perform one-hot encoding on the categorical columns
encoder = ce.OrdinalEncoder()
X = encoder.fit_transform(train_df.drop('COST', axis=1))
X_test = encoder.transform(test_df)

# Split the training data into training and validation sets
y_train = train_df['COST']
X_train, X_val, y_train, y_val = train_test_split(X, y_train, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the validation set
val_pred = model.predict(X_val)

# Evaluate the model on the validation set
print("RMSE:", np.sqrt(mean_squared_error(y_val, val_pred)))

# Make predictions on the test data
test_pred = model.predict(X_test)

# Save the predictions to a new CSV file
submission_df = pd.DataFrame({'COST': test_pred})
submission_df.to_csv('predictions.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
import category_encoders as ce
# from category_encoders import OrdinalEncoder
from sklearn.model_ import train_test_split
from sklearn.ensemble import RandomstRegressor

# Load the training data
train_df = pd.read_excel('Data_Train.xlsx')

# Load the test data
test_df = pd.read_excel('Data_Test.xlsx')

# Drop the TITLE column as it is not useful for prediction
test_df = test_df.drop('TITLE', axis=1)

# Perform one-hot encoding on the categorical columns
encoder = ce.OrdinalEncoder()
X = encoder.fit_transform(train_df.drop('COST', axis=1))
X_test = encoder.transform(test_df)

# Split the training data into training and validation sets
y_train = train_df['COST']
X_train, X_val, y_train, y_val = train_test_split(X, y_train, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the validation set
val_pred = model.predict(X_val)

# Evaluate the model on the validation set
print("RMSE:", np.sqrt(mean_squared_error(y_val, val_pred)))

# Make predictions on the test data
test_pred = model.predict(X_test)

# Save the predictions to a new CSV file
submission_df = pd.DataFrame({'COST': test_pred})
submission_df.to_csv('predictions.csv', index=False)