In [5]:
import pandas as pd
from datetime import datetime, timedelta
import os
from sklearn.model_selection import train_test_split
import statsmodels.api as sm

In [8]:
directory_traffic = 'ibb_data_byDay_ANADOLU_AVRUPA'
directory_price = 'petrol_prices'
alpha = 0.05

for cont in ['AVRUPA', 'ANADOLU']:
    temp_df_columns = ['DATE', 'RESULT']
    temp_traffic_df = pd.DataFrame(columns=temp_df_columns)

    start_date = datetime.strptime("2020-01-01", "%Y-%m-%d")
    end_date = datetime.strptime("2022-12-31", "%Y-%m-%d")

    current_date = start_date
    while current_date <= end_date:
        formatted_date = current_date.strftime("%Y-%m-%d")
        temp_traffic_df.loc[len(temp_traffic_df)] = [formatted_date, 0.0]
        current_date += timedelta(days=1)
        
    sub_dir = os.path.join(directory_traffic, cont)
    df_price = pd.read_csv(os.path.join(directory_price, f'{cont}.csv'))
    for file in os.listdir(sub_dir):
        df_traffic = pd.read_csv(os.path.join(sub_dir, file))
        temp_traffic_df['RESULT'] += df_traffic['RESULT']
    
    if cont == 'AVRUPA':
        temp_traffic_df['RESULT'] = temp_traffic_df['RESULT']/25
    else:
        temp_traffic_df['RESULT'] = temp_traffic_df['RESULT']/13
        
    for price in ['GASOLINE_PRICE', 'DIESEL_PRICE']:    
        y=temp_traffic_df.drop(temp_traffic_df.index[0])['RESULT']
        X=df_price.drop(df_price.index[[0, -1]])[[price]].values.reshape(-1, 1)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        X_with_constant = sm.add_constant(X)

        model = sm.OLS(y, X_with_constant)
        results = model.fit()

        f_statistic = results.fvalue
        p_value = results.f_pvalue

        print("P-Value:", p_value)
        if p_value < alpha:
            print("Reject the null hypotesis!")
            print(f"There is a relation between {price[:-6].capitalize()} price and average speed in {cont.capitalize()}.")
        else:
            print("Null hypotesis is true!")
            print(f"There is no relation between {price[:-6].capitalize()} price and average speed in {cont.capitalize()}.")
        print('\n')

P-Value: 1.2668149717927922e-06
Reject the null hypotesis!
There is a relation between Gasoline price and average speed in Avrupa.


P-Value: 4.514771077171582e-06
Reject the null hypotesis!
There is a relation between Diesel price and average speed in Avrupa.


P-Value: 2.7192036590267928e-15
Reject the null hypotesis!
There is a relation between Gasoline price and average speed in Anadolu.


P-Value: 7.821532833678752e-15
Reject the null hypotesis!
There is a relation between Diesel price and average speed in Anadolu.


