# Imports


In [84]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, r2_score

import statsmodels.api as sm

# Preprocessing


In [None]:
data = pd.read_csv(r"data/prediction/tennis.csv")
data.head()

In [86]:
le = LabelEncoder()
ohe = OneHotEncoder()

In [None]:
outlook = data["outlook"]
outlook = le.fit_transform(outlook)
outlook = outlook.reshape(-1, 1)
outlook = ohe.fit_transform(outlook).toarray()
outlook

In [None]:
outlook_df = pd.DataFrame(data=outlook, columns=["sunny", "overcast", "rainy"])
outlook_df

In [None]:
windy = data["windy"]
windy = le.fit_transform(windy)
windy = windy.reshape(-1, 1)
windy

In [None]:
windy_df = pd.DataFrame(data=windy, columns=["windy"])
windy_df

In [None]:
play = data["play"]
play = le.fit_transform(play)
play = play.reshape(-1, 1)
play

In [None]:
play_df = pd.DataFrame(data=play, columns=["play"])
play_df

In [None]:
temp_humi = data[["temperature", "humidity"]]
temp_humi

In [None]:
final_df = pd.concat([outlook_df, temp_humi, windy_df, play_df], axis=1)
final_df

# Backward Elimination


In [None]:
X_l = final_df.iloc[:, [0, 1, 2, 3, 5, 6]].values
Y_l = final_df.iloc[:, 4].values
model = sm.OLS(Y_l, X_l).fit()
model.summary()

In [None]:
X_l = final_df.iloc[:, [0, 1, 2, 3, 6]].values
Y_l = final_df.iloc[:, 4].values
model = sm.OLS(Y_l, X_l).fit()
model.summary()

In [97]:
# final_df = final_df.drop(columns=["windy"])

# Model


In [None]:
target = "humidity"
method = "binary" if target in ["play", "windy"] else "continous"
is_scaled = True
x_train, x_test, y_train, y_test = train_test_split(final_df.drop(columns=[target]), final_df[target], test_size=.33, random_state=0)

lr = LinearRegression()
if is_scaled:
    sc = StandardScaler()
    X_train = sc.fit_transform(x_train)
    X_test = sc.transform(x_test)
else:
    X_train = x_train
    X_test = x_test

lr.fit(X_train, y_train)
pred = lr.predict(X_test)

if method == "binary":
    threshold = .5
    pred_binary = (pred > threshold).astype(int)
    accuracy = accuracy_score(y_test, pred_binary)
    print(pred_binary == y_test)
    print(accuracy)
elif method == "continous":
    mse = mean_squared_error(y_test, pred)
    mae = mean_absolute_error(y_test, pred)
    r2 = r2_score(y_test, pred)
    print(f"Mean Squared Error (MSE): {mse:.2f}")
    print(f"Mean Absolute Error (MAE): {mae:.2f}")
    print(f"R^2 Score: {r2:.2f}")
    print()
    print(y_test.values)
    print(np.round(pred, 2))

### Model Performans Karşılaştırması

#### 1. Windy Özelliği Kullanılarak Elde Edilen Sonuçlar

-   **Mean Squared Error (MSE)**: 205.57
-   **Mean Absolute Error (MAE)**: 11.60
-   **R² Score**: -1.31

**Gerçek Değerler**: `[70, 65, 80, 90, 86]`

**Tahmin Edilen Değerler**: `[84.45 63.94 85.76 64.21 75.07]`

#### 2. Windy Özelliği Kullanılmadan Elde Edilen Sonuçlar

-   **Mean Squared Error (MSE)**: 110.07
-   **Mean Absolute Error (MAE)**: 7.99
-   **R² Score**: -0.24

**Gerçek Değerler**: `[70, 65, 80, 90, 86]`

**Tahmin Edilen Değerler**: `[75.41 65.41 83.91 69.91 75.9 ]`
