In [2]:
import pandas as pd
from sklearn.metrics import mean_squared_error, accuracy_score

In [4]:

file_path = '/content/results_20_epoch.csv'  # مسیر فایل CSV خود را اینجا قرار دهید
df = pd.read_csv(file_path)
df

Unnamed: 0,instrument,datetime,True,Predictions
0,SH600000,2020-06-17,-0.180130,0.248004
1,SH600004,2020-06-17,-1.089625,0.320068
2,SH600009,2020-06-17,-0.287761,0.152760
3,SH600011,2020-06-17,-0.627949,-0.006375
4,SH600015,2020-06-17,-0.262454,0.015099
...,...,...,...,...
175486,SZ300896,2022-12-30,0.996180,0.094017
175487,SZ300919,2022-12-30,0.736723,-0.242000
175488,SZ300957,2022-12-30,-0.895194,-0.029257
175489,SZ300979,2022-12-30,1.781169,0.172603


In [5]:
df.columns = ['instrument', 'datetime', 'y_actual', 'raw_forecast']

# پیدا کردن تمام شرکت‌ها
companies = df['instrument'].unique()

In [7]:
companies

array(['SH600000', 'SH600004', 'SH600009', 'SH600011', 'SH600015',
       'SH600016', 'SH600018', 'SH600019', 'SH600023', 'SH600025',
       'SH600027', 'SH600028', 'SH600029', 'SH600030', 'SH600031',
       'SH600036', 'SH600038', 'SH600048', 'SH600050', 'SH600061',
       'SH600066', 'SH600068', 'SH600085', 'SH600089', 'SH600100',
       'SH600104', 'SH600109', 'SH600111', 'SH600115', 'SH600118',
       'SH600153', 'SH600170', 'SH600176', 'SH600177', 'SH600183',
       'SH600188', 'SH600196', 'SH600208', 'SH600219', 'SH600221',
       'SH600271', 'SH600276', 'SH600297', 'SH600299', 'SH600309',
       'SH600332', 'SH600340', 'SH600346', 'SH600352', 'SH600362',
       'SH600369', 'SH600372', 'SH600383', 'SH600390', 'SH600398',
       'SH600406', 'SH600436', 'SH600482', 'SH600487', 'SH600489',
       'SH600498', 'SH600516', 'SH600519', 'SH600522', 'SH600535',
       'SH600547', 'SH600566', 'SH600570', 'SH600583', 'SH600585',
       'SH600588', 'SH600606', 'SH600637', 'SH600655', 'SH6006

In [6]:
df

Unnamed: 0,instrument,datetime,y_actual,raw_forecast
0,SH600000,2020-06-17,-0.180130,0.248004
1,SH600004,2020-06-17,-1.089625,0.320068
2,SH600009,2020-06-17,-0.287761,0.152760
3,SH600011,2020-06-17,-0.627949,-0.006375
4,SH600015,2020-06-17,-0.262454,0.015099
...,...,...,...,...
175486,SZ300896,2022-12-30,0.996180,0.094017
175487,SZ300919,2022-12-30,0.736723,-0.242000
175488,SZ300957,2022-12-30,-0.895194,-0.029257
175489,SZ300979,2022-12-30,1.781169,0.172603


In [17]:
def calculate_mse(context_results_df):
    return mean_squared_error(context_results_df['y_actual'], context_results_df['raw_forecast'])

def calculate_accuracy(context_results_df):
    correct_predictions = 0
    total_predictions = 0
    for i in range(1, len(context_results_df)):
        actual_today = context_results_df['y_actual'].iloc[i]
        predicted_today = context_results_df['raw_forecast'].iloc[i]
        actual_yesterday = context_results_df['y_actual'].iloc[i - 1]
        actual_movement = actual_today - actual_yesterday
        predicted_movement = predicted_today - actual_yesterday
        if (actual_movement > 0 and predicted_movement > 0) or (actual_movement < 0 and predicted_movement < 0):
            correct_predictions += 1
        total_predictions += 1
    return correct_predictions / total_predictions if total_predictions > 0 else 0



def calculate_results_for_context_len(results_by_context_len):
    # فیلتر کردن داده‌ها برای context_len مشخص شده
    context_results_df = results_by_context_len

    mse_value = calculate_mse(context_results_df)
    accuracy_value = calculate_accuracy(context_results_df)





    # ساخت دیکشنری از نتایج محاسبه شده
    result = {
        'mse': mse_value,
        'accuracy': accuracy_value,

    }
    return pd.DataFrame([result])

In [18]:
final_results = []


In [19]:
for company in companies:
    company_data = df[df['instrument'] == company].copy()
    company_data['datetime'] = pd.to_datetime(company_data['datetime'])
    company_data.set_index('datetime', inplace=True)

    # محاسبه نتایج برای شرکت
    company_result = calculate_results_for_context_len( company_data)
    company_result['company'] = company
    final_results.append(company_result)

# ترکیب تمام نتایج
final_results_df = pd.concat(final_results).reset_index(drop=True)

# نمایش نتایج
print(final_results_df)

          mse  accuracy   company
0    0.299650  0.619741  SH600000
1    0.915570  0.618257  SH600004
2    1.031959  0.589381  SH600009
3    1.236823  0.636917  SH600011
4    0.282862  0.632686  SH600015
..        ...       ...       ...
416  2.242578  0.669903  SZ300763
417  1.353759  0.672727  SZ300957
418  1.121411  0.655172  SZ300979
419  1.390052  0.553571  SH688065
420  1.573032  0.666667  SZ300919

[421 rows x 3 columns]


In [20]:
best_mse_company = final_results_df.loc[final_results_df['mse'].idxmin()]
best_accuracy_company = final_results_df.loc[final_results_df['accuracy'].idxmax()]


# نمایش نتایج
print("Best company based on MSE:")
print(best_mse_company)

print("\nBest company based on Accuracy:")
print(best_accuracy_company)





Best company based on MSE:
mse         0.110684
accuracy    0.666667
company     SZ002081
Name: 227, dtype: object

Best company based on Accuracy:
mse         0.704169
accuracy    0.833333
company     SH600566
Name: 66, dtype: object
