# <font color='orange'>sklearn

### ML Algorithms

# <font color='red'> ---First Regression

#### 1-Linear Regression

In [None]:
# Model Building
from sklearn.linear_model import LinearRegression

# Linear Regression
lr = LinearRegression()
lr.fit(X_train_preprocessed, y_train)
lr.score(X_train_preprocessed, y_train)

# Model Evaluation
from sklearn.metrics import mean_squared_error, r2_score

# Predictions
y_pred = lr.predict(X_test_preprocessed)

# Model Evaluation 
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R2:', r2_score(y_test, y_pred))
print(lr.coef_)

#### 2-Ridge

In [None]:
# Ridge Regression
from sklearn.linear_model import Ridge
ridge=Ridge()
ridge.fit(X_train_preprocessed, y_train)
lr.score(X_train_preprocessed, y_train)

y_pred=ridge.predict(X_test_preprocessed)

print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R2:', r2_score(y_test, y_pred))


#### 3-polynomial

In [None]:
#polynomial regression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

poly=make_pipeline(PolynomialFeatures(2),Ridge())
poly.fit(X_train_preprocessed, y_train)

y_pred=poly.predict(X_test_preprocessed)

print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R2:', r2_score(y_test, y_pred))

In [None]:
def log_transform(x):
    return np.log(x)
y_train_log=log_transform(y_train)
y_test_log=log_transform(y_test)

# Linear Regression
lr = LinearRegression()
lr.fit(X_train_preprocessed,y_train_log)

y_pred = lr.predict(X_test_preprocessed)

print('RMSE:', np.sqrt(mean_squared_error(y_test_log, y_pred)))
print('R2:', r2_score(y_test_log, y_pred))

In [None]:
poly=make_pipeline(PolynomialFeatures(2),Ridge())
poly.fit(X_train_preprocessed,y_train_log)

y_pred=poly.predict(X_test_preprocessed)

print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R2:', r2_score(y_test_log, y_pred))  

#### DecisionTreeRegressor

In [None]:
from sklearn.tree import DecisionTreeRegressor

dt = DecisionTreeRegressor()

dt.fit(x_train_processed, y_train)

print('Training Score: ', dt.score(x_train_processed, y_train))
print('Testing Score: ', dt.score(x_test_processed, y_test))

#### Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor

# Random Forest Regressor
rf_reg = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)

# Fitting the data
rf_reg.fit(x_train_processed, y_train)

# Checking the score
print('Training Score: ', rf_reg.score(x_train_processed, y_train))
print('Testing Score: ', rf_reg.score(x_test_processed, y_test))

y_pred=rf_reg.predict(x_test_processed)

print('rmes:',np.sqrt(mean_squared_error(y_test,y_pred)))
print('r2:',r2_score(y_test,y_pred))


#### AdaBoost Regressor

In [None]:
from sklearn.ensemble import AdaBoostRegressor

ada_reg = AdaBoostRegressor(n_estimators=200, random_state=42)

# Fitting the data
ada_reg.fit(x_train_processed, y_train)

y_pred=ada_reg.predict(x_test_processed)
# Checking the score
print('Training Score: ', ada_reg.score(x_train_processed, y_train))
print('Testing Score: ', ada_reg.score(x_test_processed, y_test))

print('rmes:',np.sqrt(mean_squared_error(y_test,y_pred)))
print('r2:',r2_score(y_test,y_pred))

#### GradientBoostingRegressor

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

grad_reg = GradientBoostingRegressor(n_estimators=200, max_depth=3, random_state=42)

# Fitting the data
grad_reg.fit(x_train_processed, y_train)

y_pred=grad_reg.predict(x_test_processed)

# Checking the score
print('Training Score: ', grad_reg.score(x_train_processed, y_train))
print('Testing Score: ', grad_reg.score(x_test_processed, y_test))

print('rmes:',np.sqrt(mean_squared_error(y_test,y_pred)))
print('r2:',r2_score(y_test,y_pred))

#### XGBRegressor

In [None]:
from xgboost import XGBRegressor

xgb = XGBRegressor(n_estimators=300, max_depth=3, random_state=42)

# Fitting the data
xgb.fit(x_train_processed, y_train)

y_pred=xgb.predict(x_test_processed)

# Checking the score
print('Training Score: ', xgb.score(x_train_processed, y_train))
print('Testing Score: ', xgb.score(x_test_processed, y_test))

print('rmes:',np.sqrt(mean_squared_error(y_test,y_pred)))
print('r2:',r2_score(y_test,y_pred))

#### Voting Regressor

In [None]:
from sklearn.ensemble import  VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

# Weak Learners
lin_reg = LinearRegression()
dt_reg = DecisionTreeRegressor(max_depth=10)
svr_reg = SVR(kernel='rbf')

# Voting Regressor
voting_reg = VotingRegressor(estimators=[('lr', lin_reg), ('dt', dt_reg), ('svr', svr_reg)])

# Fitting the data
voting_reg.fit(x_train_processed, y_train)

# Checking the score
print('Training Score: ', voting_reg.score(x_train_processed, y_train))
print('Testing Score: ', voting_reg.score(x_test_processed, y_test))

# <font color='red'> ---Second Classification

#### LogisticRegression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score,accuracy_score

lr=LogisticRegression()

lr.fit(x_train,y_train)

pred=lr.predict(x_test)


print("Classification Report is:\n",classification_report(y_test,pred))
print("\n F1:\n",f1_score(y_test,pred))
print("\n Precision score is:\n",precision_score(y_test,pred))
print("\n Recall score is:\n",recall_score(y_test,pred))
print("\n Confusion Matrix:\n")
sns.heatmap(confusion_matrix(y_test,pred))

#### 2.Support Vector Machine

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score

model = SVC()
kernel = ['poly', 'rbf', 'sigmoid']
C = [50, 10, 1.0, 0.1, 0.01]
gamma = ['scale']

grid = dict(kernel=kernel,C=C,gamma=gamma)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, 
                           cv=cv, scoring='f1',error_score=0)


svm = grid_search.fit(x_train, y_train)
svm_pred=svm.predict(x_test)                           
                          
print("Classification Report is:\n",classification_report(y_test,svm_pred))
print('*'*100)
print("\n F1:\n",f1_score(y_test,svm_pred))
print("\n Precision score is:\n",precision_score(y_test,svm_pred))
print("\n Recall score is:\n",recall_score(y_test,svm_pred))
print("\n Confusion Matrix:\n")
sns.heatmap(confusion_matrix(y_test,svm_pred)) 

#### DecisionTreeClassifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.model_selection import GridSearchCV

dt=DecisionTreeClassifier()

params = {
    'max_depth': [5, 10, 20,25],
    'min_samples_leaf': [10, 20, 50, 100,120],
    'criterion': ["gini", "entropy"]
}

grid_search = GridSearchCV(estimator=dt, 
                           param_grid=params, 
                           cv=4, n_jobs=-1, verbose=1, scoring = "accuracy")

best_model=grid_search.fit(x_train, y_train)

dt_pred=best_model.predict(x_test)


print("Classification Report is:\n",classification_report(y_test,dt_pred))
print('*'*100)
print("\n F1:\n",f1_score(y_test,dt_pred))
print("\n Precision score is:\n",precision_score(y_test,dt_pred))
print("\n Recall score is:\n",recall_score(y_test,dt_pred))
print("\n Confusion Matrix:\n")
sns.heatmap(confusion_matrix(y_test,dt_pred))

#### RandomForestClassifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV

# define models and parameters
rf= RandomForestClassifier()
n_estimators = [1800]
max_features = ['sqrt', 'log2']


# define grid search
grid = dict(n_estimators=n_estimators,max_features=max_features)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=rf, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)


best_model = grid_search.fit(x_train, y_train)

rf_pred=best_model.predict(x_test)


print("Classification Report is:\n",classification_report(y_test,rf_pred))
print("\n F1:\n",f1_score(y_test,rf_pred))
print("\n Precision score is:\n",precision_score(y_test,rf_pred))
print("\n Recall score is:\n",recall_score(y_test,rf_pred))
print("\n Confusion Matrix:\n")
sns.heatmap(confusion_matrix(y_test,rf_pred))

#### KNeighborsClassifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score

knn=KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train, y_train)

knn_pred=knn.predict(x_test)

print("Classification Report is:\n",classification_report(y_test,knn_pred))
print('*'*100)
print("\n F1:\n",f1_score(y_test,knn_pred))
print("\n Precision score is:\n",precision_score(y_test,knn_pred))
print("\n Recall score is:\n",recall_score(y_test,knn_pred))
print("\n Confusion Matrix:\n")
sns.heatmap(confusion_matrix(y_test,knn_pred))

#### Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV

param_grid_nb = {
    'var_smoothing': np.logspace(0,-2, num=100)
}
nbModel_grid = GridSearchCV(estimator=GaussianNB(), param_grid=param_grid_nb, verbose=1, cv=10, n_jobs=-1)


best_model= nbModel_grid.fit(x_train, y_train)

nb_pred=best_model.predict(x_test)


print("Classification Report is:\n",classification_report(y_test,nb_pred))
print('*'*100)
print("\n F1:\n",f1_score(y_test,nb_pred))
print("\n Precision score is:\n",precision_score(y_test,nb_pred))
print("\n Recall score is:\n",recall_score(y_test,nb_pred))
print("\n Confusion Matrix:\n")
sns.heatmap(confusion_matrix(y_test,nb_pred))

#### GradientBoostingClassifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
max_depth=1, random_state=0).fit(x_train, y_train)

pred=clf.predict(x_test)

print("Classification Report is:\n",classification_report(y_test,pred))
print('*'*100)
print("\n F1:\n",f1_score(y_test,pred))
print("\n Precision score is:\n",precision_score(y_test,pred))
print("\n Recall score is:\n",recall_score(y_test,pred))
print("\n Confusion Matrix:\n")
sns.heatmap(confusion_matrix(y_test,pred))

#### BaggingClassifier

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression


# Weak Learner
lg_reg = LogisticRegression()
dt=DecisionTreeClassifier(max_depth=10)
naive=GaussianNB()
rf=RandomForestClassifier()

# Bagging Regressor
#bag_cls1 = BaggingClassifier(lg_reg, n_estimators=500, bootstrap=True, random_state=42)
#bag_cls2 = BaggingClassifier(naive, n_estimators=10, bootstrap=True, random_state=42)
bag_cls3 = BaggingClassifier(dt, n_estimators=100, bootstrap=True, random_state=42,
                           max_features=0.8)
                           

# Fitting the data
bag_cls3.fit(x_train, y_train)

pred=bag_cls3.predict(x_test)
# Checking the score

print("Classification Report is:\n",classification_report(y_test,pred))
print('*'*100)
print("\n F1:\n",f1_score(y_test,pred))
print("\n Precision score is:\n",precision_score(y_test,pred))
print("\n Recall score is:\n",recall_score(y_test,pred))
print("\n Confusion Matrix:\n")
sns.heatmap(confusion_matrix(y_test,pred))

#### XGBClassifier

In [None]:
from xgboost import XGBClassifier

xgb = XGBClassifier(n_estimators=300, max_depth=3, random_state=42)

# Fitting the data
xgb.fit(x_train, y_train)

pred=xgb.predict(x_test)
# Checking the score
print("Classification Report is:\n",classification_report(y_test,pred))
print('*'*100)
print("\n F1:\n",f1_score(y_test,pred))
print("\n Precision score is:\n",precision_score(y_test,pred))
print("\n Recall score is:\n",recall_score(y_test,pred))
print("\n Confusion Matrix:\n")
sns.heatmap(confusion_matrix(y_test,pred))

#### Neural Network classifier

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
import keras
from keras.models import Sequential
from keras.layers import Dense
import warnings

classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(output_dim = 11, init = 'uniform', activation = 'relu', input_dim = 22))

# Adding the second hidden layer
classifier.add(Dense(output_dim = 11, init = 'uniform', activation = 'relu'))

# Adding the output layer
classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))

# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])


classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 100)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

import seaborn as sns
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred.round())
sns.heatmap(cm,annot=True,cmap="Blues",fmt="d",cbar=False)
#accuracy score
from sklearn.metrics import accuracy_score
ac=accuracy_score(y_test, y_pred.round())
print('accuracy of the model: ',ac)

# <font color='blue'> Ahmed Essam