In [None]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
df = pd.read_csv("C:/Users/Yerko/Desktop/datasets/Owid.csv")
print(df.head)

In [None]:
regions = {
    'Africa': [],
    'Asia': [],
    'Russia': ['Russia'],
    'North America': [],
    'South America': [],
    'East Europe': [],
    'West Europe': []
}

cop_28 = [
    "United States", "Armenia", "Bulgaria", "Canada", "Croatia", "Czechia", 
    "Finland", "France", "Ghana", "Hungary", "Jamaica", "Japan", "South Korea", 
    "Moldova", "Mongolia", "Morocco", "Netherlands", "Poland", "Romania", "Slovakia", 
    "Slovenia", "Sweden", "Ukraine", "United Arab Emirates", "United Kingdom"
]

for country in df['country'].unique():
    if country not in cop_28:
        if country in ['Algeria', 'Angola', 'Benin', 'Botswana', 'Burkina Faso', 'Burundi', 'Cameroon', 'Cape Verde',
                       'Central African Republic', 'Chad', 'Comoros', 'Congo', 'Djibouti', 'Egypt', 'Equatorial Guinea',
                       'Eritrea', 'Eswatini', 'Ethiopia', 'Gabon', 'Gambia', 'Ghana', 'Guinea', 'Guinea-Bissau', 'Ivory Coast',
                       'Kenya', 'Lesotho', 'Liberia', 'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania', 'Mauritius',
                       'Morocco', 'Mozambique', 'Namibia', 'Niger', 'Nigeria', 'Rwanda', 'Sao Tome and Principe', 'Senegal',
                       'Seychelles', 'Sierra Leone', 'Somalia', 'South Africa', 'South Sudan', 'Sudan', 'Tanzania', 'Togo',
                       'Tunisia', 'Uganda', 'Zambia', 'Zimbabwe']:
            regions['Africa'].append(country)
        elif country in ['Afghanistan', 'Armenia', 'Azerbaijan', 'Bahrain', 'Bangladesh', 'Bhutan', 'Brunei', 'Cambodia',
                         'China', 'Cyprus', 'Georgia', 'India', 'Indonesia', 'Iran', 'Iraq', 'Israel', 'Japan', 'Jordan',
                         'Kazakhstan', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Lebanon', 'Malaysia', 'Maldives', 'Mongolia',
                         'Myanmar', 'Nepal', 'North Korea', 'Oman', 'Pakistan', 'Palestine', 'Philippines', 'Qatar', 'Saudi Arabia',
                         'Singapore', 'South Korea', 'Sri Lanka', 'Syria', 'Taiwan', 'Tajikistan', 'Thailand', 'Timor-Leste',
                         'Turkey', 'Turkmenistan', 'United Arab Emirates', 'Uzbekistan', 'Vietnam', 'Yemen']:
            regions['Asia'].append(country)
        elif country in ['Canada', 'United States', 'Mexico', 'Greenland', 'Bermuda']:
            regions['North America'].append(country)
        elif country in ['Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador', 'Guyana', 'Paraguay', 'Peru',
                        'Suriname', 'Uruguay', 'Venezuela']:
            regions['South America'].append(country)
        elif country in ['Albania', 'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Czechia', 'Estonia', 'Hungary',
                        'Kosovo', 'Latvia', 'Lithuania', 'Macedonia', 'Moldova', 'Montenegro', 'Poland', 'Romania', 
                        'Serbia', 'Slovakia', 'Slovenia', 'Ukraine', 'Belarus']:
            regions['East Europe'].append(country)
        elif country in ['Austria', 'Belgium', 'Cyprus', 'Denmark', 'Finland', 'France', 'Germany', 'Greece', 'Iceland',
                        'Ireland', 'Italy', 'Luxembourg', 'Malta', 'Netherlands', 'Norway', 'Portugal', 'Spain', 'Sweden',
                        'Switzerland', 'United Kingdom']:
            regions['West Europe'].append(country)

In [None]:
filtered_data = df[df['year'] >= 1985]
region_electricity = pd.DataFrame()

In [None]:
for region, countries in regions.items():
    region_data = filtered_data[filtered_data['country'].isin(countries)]
    aggregated_data = region_data.groupby('year')['nuclear_electricity'].sum().rename(region)
    
    if region_electricity.empty:
        region_electricity = aggregated_data.to_frame()
    else:
        region_electricity = region_electricity.join(aggregated_data, how='outer')

electricity = region_electricity['Asia'].dropna()
adf_test = adfuller(electricity)

adf_result = {
    'ADF Statistic': adf_test[0],
    'p-value': adf_test[1],
    'Number of Lags Used': adf_test[2],
    'Number of Observations Used': adf_test[3],
    'Critical Values': adf_test[4],
}

adf_result

In [None]:
def optimize_sarimax(series, p_range, d_range, q_range, seasonal_period):
    best_aic = np.inf
    best_order = None
    best_seasonal_order = None
    best_mdl = None

    P = D = Q = range(2)  
    m = seasonal_period   

    for p in p_range:
        for d in d_range:
            for q in q_range:
                for P_ in P:
                    for D_ in D:
                        for Q_ in Q:
                            try:
                                temp_model = SARIMAX(series, order=(p, d, q),
                                                     seasonal_order=(P_, D_, Q_, m),
                                                     enforce_stationarity=False,
                                                     enforce_invertibility=False)
                                results = temp_model.fit(disp=False)
                                if results.aic < best_aic:
                                    best_aic = results.aic
                                    best_order = (p, d, q)
                                    best_seasonal_order = (P_, D_, Q_, m)
                                    best_mdl = results
                            except:
                                continue
    return best_aic, best_order, best_seasonal_order, best_mdl

p_range = range(0, 2)  
d_range = range(0, 2)
q_range = range(0, 2)
seasonal_period = 12  

best_aic, best_order, best_seasonal_order, best_model = optimize_sarimax(electricity.dropna(), 
                                                                         p_range, d_range, q_range, seasonal_period)

(best_aic, best_order, best_seasonal_order, best_model.summary())

In [None]:
try:
    last_data_year = electricity.index[-1]
    steps_to_2050 = 2050 - last_data_year
    start_year = electricity.index.min()
    total_periods_to_forecast = 2050 - start_year + 1

    end_year = 2051

    forecast_years = pd.date_range(start=pd.to_datetime(str(last_data_year + 1)), end=pd.to_datetime(str(end_year)), freq='A').year

    correct_forecast_to_2050 = best_model.get_forecast(steps=len(forecast_years))
    correct_forecast_values_2050 = correct_forecast_to_2050.summary_frame(alpha=0.05)

    correct_forecast_values_2050.index = forecast_years
    correct_forecast_values_2050.index.name = 'Year'

    output = correct_forecast_values_2050
    
    path_to_save_csv = "C:/Users/Yerko/Desktop/results/Nuclear_Electricity_World_Forecasts.csv"
    
    output.to_csv(path_to_save_csv)
    print("Data saved successfully to", path_to_save_csv)
    
except Exception as e:
    print("An error occurred:", str(e))
