<a href="https://colab.research.google.com/github/Zaides01/GP3/blob/main/ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model  import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neighbors  import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score

### Чтение файла

In [None]:
df = (pd.read_csv('df_maps.csv').dropna(subset=['text_lemmas', 'rating']))

### Разметка данных

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df['text_lemmas'],df['rating'].astype(float),test_size=.2, random_state=42, stratify=df['rating'])

### Пайплайн

In [None]:
text_pipe = Pipeline([('tfidf', TfidfVectorizer(max_features=6000,ngram_range=(1,2),token_pattern=r'\b[а-яё]+\b')),('svd',   TruncatedSVD(n_components=200, random_state=42)),('sc',    StandardScaler(with_mean=False))])
text_pipe.fit(X_train)
X_train_vec = text_pipe.transform(X_train)
X_test_vec  = text_pipe.transform(X_test)

### Сравниваем 5 моделей МО



In [None]:
ridge = Ridge(alpha=1.0, random_state=42)
ridge.fit(X_train_vec, y_train)
y_pred_ridge  = ridge.predict(X_test_vec)
y_round_ridge = np.clip(np.rint(y_pred_ridge), 1, 5)
print("mae", mean_absolute_error(y_test, y_pred_ridge))
print("rmse", mean_squared_error(y_test, y_pred_ridge))
print("R^2", r2_score(y_test, y_pred_ridge))
print("accuracy", accuracy_score(y_test, y_round_ridge))

mae 0.5244307923034988
rmse 0.5555303398046914
R^2 0.5400346385945249
accuracy 0.6599268511489891


In [None]:
tree = DecisionTreeRegressor(max_depth=20,min_samples_leaf=5,random_state=42)
tree.fit(X_train_vec, y_train)
y_pred_tree  = tree.predict(X_test_vec)
y_round_tree = np.clip(np.rint(y_pred_tree), 1, 5)
print("mae", mean_absolute_error(y_test, y_pred_tree))
print("rmse", mean_squared_error(y_test, y_pred_tree))
print("R^2", r2_score(y_test, y_pred_tree))
print("accuracy", accuracy_score(y_test, y_round_tree))

mae 0.4560659642476801
rmse 0.913062683168444
R^2 0.24400671402199547
accuracy 0.7336967773100546


In [None]:
extra = ExtraTreesRegressor(n_estimators=50,n_jobs=-1,random_state=42)
extra.fit(X_train_vec, y_train)
y_pred_extra  = extra.predict(X_test_vec)
y_round_extra = np.clip(np.rint(y_pred_extra), 1, 5)
print("mae", mean_absolute_error(y_test, y_pred_extra))
print("rmse", mean_squared_error(y_test, y_pred_extra))
print("R^2", r2_score(y_test, y_pred_extra))
print("accuracy", accuracy_score(y_test, y_round_extra))

mae 0.4033436845858348
rmse 0.5049247398768585
R^2 0.5819348218826781
accuracy 0.7292112345593816


In [None]:
knn = KNeighborsRegressor(n_neighbors=10,weights='distance',n_jobs=-1)
knn.fit(X_train_vec, y_train)
y_pred_knn  = knn.predict(X_test_vec)
y_round_knn = np.clip(np.rint(y_pred_knn), 1, 5)
print("mae", mean_absolute_error(y_test, y_pred_knn))
print("rmse", mean_squared_error(y_test, y_pred_knn))
print("R^2", r2_score(y_test, y_pred_knn))
print("accuracy", accuracy_score(y_test, y_round_knn))

mae 0.6690819886752539
rmse 0.8956923458145231
R^2 0.2583889231044737
accuracy 0.5140431992271065


In [None]:
mlp = MLPRegressor(hidden_layer_sizes=(64,32),learning_rate_init=3e-3,max_iter=200,random_state=42,verbose=False)
mlp.fit(X_train_vec, y_train)
y_pred_mlp  = mlp.predict(X_test_vec)
y_round_mlp = np.clip(np.rint(y_pred_mlp), 1, 5)
print("mae", mean_absolute_error(y_test, y_pred_mlp))
print("rmse", mean_squared_error(y_test, y_pred_mlp))
print("R^2", r2_score(y_test, y_pred_mlp))
print("accuracy", accuracy_score(y_test, y_round_mlp))

mae 0.39198678869426506
rmse 0.6293491961557744
R^2 0.47891445395798093
accuracy 0.7773790628666069
