### **AdaBoost model Implementation**

In [22]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np

data_train = "../classification_dataset/classification_train.csv"
data_train = pd.read_csv(data_train)
X = np.array(data_train.iloc[:,0:2])
y = np.array(data_train.iloc[:,-1])
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=58)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

dtree = DecisionTreeClassifier(criterion='entropy', max_depth=1, random_state=1)
adbclassifier = AdaBoostClassifier(base_estimator=dtree,
                                   n_estimators=100,
                                   learning_rate=0.0005,
                                   algorithm = 'SAMME',
                                   random_state=1)


# clf = AdaBoostClassifier(n_estimators=10, algorithm='SAMME').fit(X_train, y_train)
adbclassifier.fit(X_train_std, y_train)
train_err = (adbclassifier.predict(X) != y).mean()
print("Training error: ", train_err)
print('Model test Score: %.3f, ' %adbclassifier.score(X_test_std, y_test),
      'Model training Score: %.3f' %adbclassifier.score(X_train, y_train))


data_test = "../classification_dataset/classification_test.csv"
data = pd.read_csv(data_test)
X = np.array(data[['x_1','x_2']])

Y = adbclassifier.predict(X)
predicted_Y = pd.DataFrame(Y)
print('=============Saving prediction===================================="')
predicted_Y.to_csv('predicted_adaBoost_sklearn.csv', index = False, header = False)
print('=============Saved prediction===================================="')

Training error:  0.059
Model test Score: 0.940,  Model training Score: 0.941


### **SVM model implementation**

In [35]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

# data loading 
data_train = "../classification_dataset/classification_train.csv"
df_data_train = pd.read_csv(data_train)
X = np.array(df_data_train.iloc[:,0:2])
y = np.array(df_data_train.iloc[:,-1])

# data splitting
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=1)

# Feature Scaling
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Training a SVM classifier
svm = SVC(kernel= 'rbf', random_state=1, C=0.1)
svm.fit(X_train, y_train)
 
# Mode performance
y_pred = svm.predict(X_test)
print('Accuracy: %.3f' %accuracy_score(y_test, y_pred))

df_predicted_Y = pd.DataFrame(predicted_val_y)
# df_predicted_Y

# prediction on test data
data_test = "../classification_dataset/classification_test.csv"
data = pd.read_csv(data_test)
X = np.array(data[['x_1','x_2']])
Y = svm.predict(X)
predicted_Y = pd.DataFrame(Y)
print('=============Saving prediction===================================="')
predicted_Y.to_csv('predicted_svm_sklearn.csv', index = False, header = False)
print('=============Saved prediction===================================="')


Accuracy: 0.955


### **Regression model**

In [18]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split as tts

data_train = "../regression_dataset/regression_train.csv"
data_train = pd.read_csv(data_train)
X = np.array(data_train.iloc[:,0:1])
y = np.array(data_train.iloc[:,:-1])

# data splitting
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=1)

# Feature Scaling
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

reg = LinearRegression().fit(X_train, y_train)

print(reg.coef_)
print(reg.intercept_)
print('Model test Score: %.3f, ' %reg.score(X_test, y_test),
      'Model training Score: %.3f' %reg.score(X_train, y_train))


data_eval = "../regression_dataset/regression_val.csv"
data = pd.read_csv(data_eval)
x_val = np.array(data.iloc[:,0:1])
y_val = np.array(data.iloc[:,:-1])
print('Model Evaluation Score: %.3f, ' %reg.score(x_val, y_val))
predicted_val_y =  reg.predict(x_val)
predicted_Y = pd.DataFrame(predicted_val_y)
# predicted_Y
# data_test = "../regression_dataset/regression_test.csv"
# data = pd.read_csv(data_test)
# X = np.array(data[['x']])

# Y = svm.predict(X)
# predicted_Y = pd.DataFrame(Y)
# print('=============Saving prediction===================================="')
# predicted_Y.to_csv('predicted_linear_regression_sklearn.csv', index = False, header = False)
# print('=============Saved prediction===================================="')

[[1.]]
[2.77555756e-17]
Model test Score: 1.000,  Model training Score: 1.000
Model Evaluation Score: 1.000, 


Unnamed: 0,0
0,8.739428
1,29.168810
2,-44.642598
3,-19.634700
4,-44.507558
...,...
995,-44.577870
996,-27.914020
997,38.165778
998,6.475633
