## PART 6

In [25]:
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns

In [26]:
# Load the dataset
day_df  = pd.read_csv('data/day.csv')
hour_df = pd.read_csv('data/hour.csv')

# Add source column to each dataset
# day_df['source'] = 'day'
# hour_df['source'] = 'hour'

# Merge the datasets
merged_df = pd.concat([day_df, hour_df], ignore_index=True)
merged_df = merged_df.drop(['dteday'], axis=1)

#TEST İÇİN 10 VERİYE İN
# merged_df = merged_df.head(10)

non_numeric_cols = merged_df.select_dtypes(exclude=['int64', 'float64']).columns
if len(non_numeric_cols) > 0:
      merged_df = pd.get_dummies(merged_df, columns=non_numeric_cols, drop_first=True)

merged_df = merged_df.dropna()

X = merged_df.drop('cnt', axis=1).to_numpy()
y = merged_df['cnt'].to_numpy().ravel()

scaler = StandardScaler()
X      = scaler.fit_transform(X)

X.shape, y.shape


((17379, 15), (17379,))

In [27]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor, export_text
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score
import time

start_time = time.time()
dt_regressor = DecisionTreeRegressor(random_state=42, max_depth=5)

# k-Fold 
kf = KFold(n_splits=5, shuffle=True, random_state=42)
mse_scores = []
r2_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    dt_regressor.fit(X_train, y_train)
    y_pred = dt_regressor.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    mse_scores.append(mse)
    r2_scores.append(r2)

# Print performance
print(f"Average Mean Squared Error (MSE): {np.mean(mse_scores):.2f}")
print(f"Average R² Score: {np.mean(r2_scores):.2f}")

def extract_rules(decision_tree, feature_names):
    tree_rules = export_text(decision_tree, feature_names=feature_names)
    return tree_rules

final_dt_regressor = DecisionTreeRegressor(random_state=42, max_depth=5)
final_dt_regressor.fit(X, y)
feature_names = [f"Feature_{i}" for i in range(X.shape[1])]
rules = extract_rules(final_dt_regressor, feature_names)

print("Extracted Rules:")
print(rules)

end_time = time.time()
print(f"Total Runtime: {end_time - start_time:.2f} seconds")

Average Mean Squared Error (MSE): 517.77
Average R² Score: 0.98
Extracted Rules:
|--- Feature_13 <= 0.31
|   |--- Feature_13 <= -0.46
|   |   |--- Feature_13 <= -0.77
|   |   |   |--- Feature_13 <= -0.91
|   |   |   |   |--- Feature_13 <= -0.97
|   |   |   |   |   |--- value: [5.13]
|   |   |   |   |--- Feature_13 >  -0.97
|   |   |   |   |   |--- value: [13.55]
|   |   |   |--- Feature_13 >  -0.91
|   |   |   |   |--- Feature_13 <= -0.85
|   |   |   |   |   |--- value: [24.69]
|   |   |   |   |--- Feature_13 >  -0.85
|   |   |   |   |   |--- value: [36.63]
|   |   |--- Feature_13 >  -0.77
|   |   |   |--- Feature_13 <= -0.61
|   |   |   |   |--- Feature_12 <= -0.43
|   |   |   |   |   |--- value: [54.72]
|   |   |   |   |--- Feature_12 >  -0.43
|   |   |   |   |   |--- value: [74.10]
|   |   |   |--- Feature_13 >  -0.61
|   |   |   |   |--- Feature_12 <= -0.29
|   |   |   |   |   |--- value: [81.82]
|   |   |   |   |--- Feature_12 >  -0.29
|   |   |   |   |   |--- value: [108.05]
|   

```markdown
#### Results
The results indicate that the Decision Tree Regressor achieved a high average R² score of 0.98, demonstrating strong predictive performance. The extracted rules provide insights into the decision-making process of the model, while the average Mean Squared Error (MSE) of 519.81 suggests a relatively low prediction error. Overall, the model performs well on the given dataset.
```