# 1.1 Linear regression

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import numpy as np

data=load_iris()
X=data.data
y=data.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

model=LinearRegression()
model.fit(X_train,y_train)

y_pred=model.predict(X_test)

print('MAS:',mean_absolute_error(y_test,y_pred))
print('MSE:',mean_squared_error(y_test,y_pred))
print('RMSE:',np.sqrt(mean_squared_error(y_test,y_pred)))
print('R2:',r2_score(y_test,y_pred))


MAS: 0.14637694965308523
MSE: 0.037113794407976845
RMSE: 0.19264940801356448
R2: 0.9468960016420045


# 1.2 Logistic regression

In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

data=load_breast_cancer()
X=data.data
y=data.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

scalar=StandardScaler()
X_train_scalar=scalar.fit_transform(X_train)
X_test_scalar=scalar.transform(X_test)

model=LogisticRegression()
model.fit(X_train_scalar,y_train)

y_pred=model.predict(X_test_scalar)

print('Accuracy:',accuracy_score(y_test,y_pred))
print('precision:',precision_score(y_test,y_pred))
print('recall:',recall_score(y_test,y_pred))
print('F1-score:',f1_score(y_test,y_pred))

Accuracy: 0.9736842105263158
precision: 0.9722222222222222
recall: 0.9859154929577465
F1-score: 0.9790209790209791


# 1.3 Decision tree classifier

In [5]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

data=load_wine()
X=data.data
y=data.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

model=DecisionTreeClassifier()
model.fit(X_train,y_train)

y_pred=model.predict(X_test)

print('accuracy:',accuracy_score(y_test,y_pred))
print('precision:',precision_score(y_test,y_pred,average='weighted'))
print('recall:',recall_score(y_test,y_pred,average='weighted'))
print('F1-score:',f1_score(y_test,y_pred,average='weighted'))

accuracy: 0.9444444444444444
precision: 0.9513888888888888
recall: 0.9444444444444444
F1-score: 0.9448559670781892


# 1.4 Random forest

In [6]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

data=load_breast_cancer()
X=data.data
y=data.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

model=RandomForestClassifier()
model.fit(X_train,y_train)

y_pred=model.predict(X_test)

print('accuracy:',accuracy_score(y_test,y_pred))
print('precision:',precision_score(y_test,y_pred,average='macro'))
print('recall:',recall_score(y_test,y_pred,average='macro'))
print('f1:',f1_score(y_test,y_pred,average='macro'))


accuracy: 0.9649122807017544
precision: 0.9672569328433009
recall: 0.9580740255486406
f1: 0.9623015873015872


# 1.5 SVM

In [None]:
from sklearn.svm import SVC

data=load_wine()
X=data.data
y=data.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

model = SVC()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred, average='weighted'))
print('Recall:', recall_score(y_test, y_pred, average='weighted'))
print('F1-score:', f1_score(y_test, y_pred, average='weighted'))


Accuracy: 0.8055555555555556
Precision: 0.801058201058201
Recall: 0.8055555555555556
F1-score: 0.80242656449553


# 1.6 K-nearest neighbors

In [8]:
from sklearn.neighbors import KNeighborsClassifier

data=load_wine()
X=data.data
y=data.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

model = SVC()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred, average='weighted'))
print('Recall:', recall_score(y_test, y_pred, average='weighted'))
print('F1-score:', f1_score(y_test, y_pred, average='weighted'))



Accuracy: 0.8055555555555556
Precision: 0.801058201058201
Recall: 0.8055555555555556
F1-score: 0.80242656449553


# 1.7 Naive Bayes 

In [9]:
from sklearn.naive_bayes import GaussianNB

data=load_wine()
X=data.data
y=data.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

model = SVC()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred, average='weighted'))
print('Recall:', recall_score(y_test, y_pred, average='weighted'))
print('F1-score:', f1_score(y_test, y_pred, average='weighted'))



Accuracy: 0.8055555555555556
Precision: 0.801058201058201
Recall: 0.8055555555555556
F1-score: 0.80242656449553


# 2.1 k-means clustering

In [10]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

data=load_wine()
X=data.data

model=KMeans(n_clusters=3,random_state=42)
model.fit(X)

print('silhouette score:',silhouette_score(X,model.labels_))


silhouette score: 0.5595823478987213




# 2.2 Hierarchial clustering

In [11]:
from sklearn.cluster import AgglomerativeClustering

data=load_wine()
X=data.data

model=AgglomerativeClustering(n_clusters=3)
model.fit(X)

print('silhouette_score:',silhouette_score(X,model.labels_))


silhouette_score: 0.5644796401732068
