-
Notifications
You must be signed in to change notification settings - Fork 0
/
task01.py
92 lines (65 loc) · 2.51 KB
/
task01.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.model_selection as skl_ms
import sklearn.linear_model as skl_lm
import sklearn.metrics as skl_m
import math
types = ['Bream',
'Roach',
'Whitefish',
'Parkki',
'Perch',
'Pike',
'Smelt']
def species_to_int(data):
newdata = data.copy()
for i in types:
newdata[i] = data['Species'].apply(lambda x: 1 if x == i else 0)
return newdata
def print_pairplot(data):
sns.pairplot(data[["Weight", "Length1", "Length2", "Length3", "Height", "Width"]])
plt.show()
def prepare(data):
teach_data, test_data = skl_ms.train_test_split(data, test_size=0.2, random_state=5)
X = teach_data[["Length1", "Height", "Width"] + types].values
y = teach_data['Weight']
X_test = test_data[["Length1", "Height", "Width"] + types].values
y_test = test_data['Weight']
return X, y, X_test, y_test
def linear_regression(X, y, X_test, y_test):
fitted_regression = skl_lm.LinearRegression().fit(X, y)
return math.sqrt(skl_m.mean_squared_error(y_test, fitted_regression.predict(X_test)))
def ridge(X, y, X_test, y_test, a=1.):
fitted_regression = skl_lm.Ridge(alpha=a).fit(X, y)
return math.sqrt(skl_m.mean_squared_error(y_test, fitted_regression.predict(X_test)))
def lasso(X, y, X_test, y_test):
fitted_regression = skl_lm.Lasso().fit(X, y)
return math.sqrt(skl_m.mean_squared_error(y_test, fitted_regression.predict(X_test)))
def print_plot(x, y):
plt.plot(x, y)
plt.show()
def start():
data = pd.read_csv("Fish.csv")
# print_pairplot(data)
data = species_to_int(data)
X, y, X_test, y_test = prepare(data)
selected_data = data[data['Weight'] <= data["Weight"].quantile(q=0.95)]
X_s, y_s, X_test_s, y_test_s = prepare(selected_data)
print("LR \tRMSE: \t", linear_regression(X, y, X_test, y_test))
print("LR s \tRMSE: \t", linear_regression(X_s, y_s, X_test_s, y_test_s))
print("Ridge \tRMSE: \t", ridge(X, y, X_test, y_test))
print("Rid s \tRMSE: \t", ridge(X_s, y_s, X_test_s, y_test_s))
print("Lasso \tRMSE: \t", lasso(X, y, X_test, y_test))
print("Las s \tRMSE: \t", lasso(X_s, y_s, X_test_s, y_test_s))
alpha = 0.
results = []
alphas = []
while alpha <= 1.:
results.append(ridge(X, y, X_test, y_test, alpha))
alphas.append(alpha)
alpha += 0.05
print("Opt Rid \tRMSE: \t", min(results))
print_plot(alphas, results)
if __name__ == '__main__':
start()