Website Performance Analysis uygulamamız için model kaydedeceğiz

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("data-export.csv", header=1)

In [4]:
df.head()

Unnamed: 0,Session primary channel group (Default channel group),Date + hour (YYYYMMDDHH),Users,Sessions,Engaged sessions,Average engagement time per session,Engaged sessions per user,Events per session,Engagement rate,Event count
0,Direct,2024041623,237,300,144,47.526667,0.607595,4.673333,0.48,1402
1,Organic Social,2024041719,208,267,132,32.097378,0.634615,4.29588,0.494382,1147
2,Direct,2024041723,188,233,115,39.939914,0.611702,4.587983,0.493562,1069
3,Organic Social,2024041718,187,256,125,32.160156,0.668449,4.078125,0.488281,1044
4,Organic Social,2024041720,175,221,112,46.918552,0.64,4.529412,0.506787,1001


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3182 entries, 0 to 3181
Data columns (total 10 columns):
 #   Column                                                 Non-Null Count  Dtype  
---  ------                                                 --------------  -----  
 0   Session primary channel group (Default channel group)  3182 non-null   object 
 1   Date + hour (YYYYMMDDHH)                               3182 non-null   int64  
 2   Users                                                  3182 non-null   int64  
 3   Sessions                                               3182 non-null   int64  
 4   Engaged sessions                                       3182 non-null   int64  
 5   Average engagement time per session                    3182 non-null   float64
 6   Engaged sessions per user                              3182 non-null   float64
 7   Events per session                                     3182 non-null   float64
 8   Engagement rate                                 

In [6]:
df.isnull().sum()

Session primary channel group (Default channel group)    0
Date + hour (YYYYMMDDHH)                                 0
Users                                                    0
Sessions                                                 0
Engaged sessions                                         0
Average engagement time per session                      0
Engaged sessions per user                                0
Events per session                                       0
Engagement rate                                          0
Event count                                              0
dtype: int64

In [7]:
df.columns = [
    'Channel', 'DateHour', 'Users', 'Sessions', 'EngagedSessions',
    'AvgEngagementTime', 'EngagedSessionsPerUser', 'EventsPerSession',
    'EngagementRate', 'EventCount'
]

In [8]:
# Tarih ve Saat İşlemleri

In [9]:
df['DateTime'] = pd.to_datetime(df['DateHour'], format='%Y%m%d%H')

In [10]:
df['Hour'] = df['DateTime'].dt.hour

In [11]:
df['DayOfWeek'] = df['DateTime'].dt.dayofweek  # 0: Pazartesi, 6: Pazar

In [12]:
df['DayOfMonth'] = df['DateTime'].dt.day

In [13]:
# Model Hazırlığı

In [14]:
X = df[['Channel', 'Hour', 'DayOfWeek']]

In [15]:
y = df['Sessions']

In [16]:
# Veriyi ayırma

In [17]:
from sklearn.model_selection import train_test_split

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
# Pipeline (Kategorik Dönüşüm + Model)

In [20]:
categorical_features = ['Channel']

In [21]:
numeric_features = ['Hour', 'DayOfWeek']

In [22]:
from sklearn.compose import ColumnTransformer

In [23]:
from sklearn.preprocessing import OneHotEncoder

In [24]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)

In [25]:
from sklearn.pipeline import Pipeline

In [26]:
from sklearn.ensemble import RandomForestRegressor

In [27]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

In [28]:
# Eğitme

In [29]:
model.fit(X_train, y_train)

In [30]:
model.score(X_train, y_train)

0.8958136675035101

In [31]:
# Değerlendirme

In [32]:
y_pred = model.predict(X_test)

In [33]:
from sklearn.metrics import mean_squared_error, r2_score

In [34]:
import numpy as np

In [35]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [36]:
print(f"Model RMSE: {rmse:.2f}")

Model RMSE: 18.41


In [37]:
r2 = r2_score(y_test, y_pred)

In [38]:
print(f"R2 Score: {r2:.4f}")

R2 Score: 0.7823


In [39]:
# save

In [40]:
import joblib

In [41]:
joblib.dump(model, 'traffic_model.pkl')

['traffic_model.pkl']

Model Score: 0.90 ve R2 Score: 0.7823