## Plotting using Python

In [None]:
import plotly.express as px
import pandas as pd
import json
import plotly.graph_objects as go
from wordcloud import WordCloud
import requests
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
from pmdarima.arima import auto_arima
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

%config InlineBackend.figure_format ='retina'

import warnings
warnings.filterwarnings('ignore')

# Wordcloud of Symptoms for monkeypox cases

In [None]:
df= pd.read_csv('E:/Phd_dataset/MonkeyPox/Worldwide_Case_Detection_Timeline.csv')
df.dropna(subset=['Symptoms'])
df.shape

In [None]:
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import pandas as pd

def wordcloud(text):
    text = text.str.replace(r"\b(ulcer|ulcerative)\b", "ulcers", regex=True).str.replace(r"\b(Rash)\b", "rash", regex=True).str.replace(",", "")
    words = text.str.replace(",", "").str.split(expand=True).unstack().value_counts()
    stopwords = ["lesions", "and"]
    words = words.drop(labels=stopwords, errors="ignore")
    top10 = words.head(10).reset_index().rename(columns={"index": "Symptom", 0: "Frequency"})
    wc = WordCloud(width=1200, height=800, background_color="white").generate_from_frequencies(words)
    plt.figure(figsize=(12, 10), dpi =700)
    plt.imshow(wc, interpolation="bilinear")
    plt.title("", fontsize=18,fontweight="bold", pad = 15)
    plt.axis("off")
    plt.savefig("Wordcloud of Monkeypox Symptoms.pdf", format = 'pdf')  
    plt.show()
    return top10 


In [None]:
wordcloud(df["Symptoms"]).to_csv("Symptoms.csv")

# Worldwide plot using plotly

In [None]:
df=pd.read_csv('E:/Phd_dataset/MonkeyPox/Daily_Country_Wise_Confirmed_Cases.csv',  parse_dates= True)
df=df.iloc[0:10]
df.set_index('Country', inplace=True)
df.head()

In [None]:
import plotly.io as pio
import plotly.express as px

pio.templates.default = "simple_white"

# Set custom color sequence for countries
colors = px.colors.qualitative.Plotly[:10]

fig = px.line(df.T, width=1200, height=600, color='Country', color_discrete_sequence=colors)
fig.update_layout(
    xaxis_title="Date",
    yaxis_title="MonkeyPox Cases",
    title="",
    title_x=0.5, title_y=0.9,
    xaxis_nticks=5,
    legend_title="Country",
    xaxis=dict(
        title_standoff=25,
        title_font=dict(size=14,color="black")
    ),
    yaxis=dict(
        title_standoff=25,
        title_font=dict(size=14,color="black")
    )
)
fig.show()

# Save figure as a high-resolution PNG
fig.write_image("MonkeyPox Cases Worldwide for Top 10 Countries.pdf", scale=2)


In [None]:
import plotly.io as pio
import plotly.express as px

pio.templates.default = "simple_white"

# Manually define colors for each country
colors = {
    'United States': 'blue',
    'Brazil': 'red',
    'Spain': 'green',
    'France': 'purple',
    'Germany': 'orange',
    'England': 'pink',
    'Peru': 'brown',
    'Colombia': 'gray',
    'Canada': 'cyan',
    'Mexico': 'magenta'
}

# Assuming df is the DataFrame containing the data for each country
fig = px.line(df.T, width=1200, height=600, color='Country', color_discrete_map=colors)
fig.update_layout(
    xaxis_title="Date",
    yaxis_title="MonkeyPox Cases",
    title="",
    title_x=0.5, title_y=0.9,
    xaxis_nticks=5,
    legend_title="Country",
    xaxis=dict(
        title_standoff=25,
        title_font=dict(size=14, color="black")
    ),
    yaxis=dict(
        title_standoff=25,
        title_font=dict(size=14, color="black")
    )
)
fig.show()

# Save figure as a high-resolution PNG
fig.write_image("MonkeyPox Cases Worldwide for Top 10 Countries.pdf", scale=2)

# Save figure as a high-resolution PNG
fig.write_image("MonkeyPox Cases Worldwide for Top 10 Countries.png", scale=2)

In [None]:
# Save figure as a high-resolution PNG
fig.write_image("MonkeyPox Cases Worldwide for Top 10 Countries.png", scale=2)

# Choropleth

In [None]:
#!pip install geopy

In [None]:
df=pd.read_csv('E:/Phd_dataset/MonkeyPox/Monkey_Pox_Cases_Worldwide1.csv')
df = df.sort_values(by='Confirmed_Cases',ascending=False)
df = df.iloc[0:100]
df = df[['Country', 'Confirmed_Cases']]

In [None]:
df['Country'].count()

In [None]:
geojson_url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json'
response = requests.get(geojson_url)
geojson = response.json()

geojson['features'][1]

In [None]:
state_id_map = {}
for feature in geojson['features']:
    feature['name'] = feature['properties']['name']
    state_id_map[feature['properties']['name']] =  feature['id']

In [None]:
state_id_map

In [None]:
geojson['features'][1]['properties']

In [None]:
df['Country'].unique()

In [None]:
missing_countries = pd.DataFrame({'Country': list(set(state_id_map.keys()) - set(df['Country'].unique()))})
missing_countries['name'] = missing_countries['Country'].apply(lambda x: state_id_map[x])

In [None]:
df['name'] = df['Country'].apply(lambda x: state_id_map[x])
df

In [None]:
fig= px.choropleth(df,locations='name', geojson = geojson, labels='Country',
                   color_continuous_scale=['yellow','red','blue','magenta','cyan','brown'],
                   color = 'Confirmed_Cases',hover_name='Country',
                   title='', 
                   scope = 'world',fitbounds = 'locations',locationmode = 'geojson-id')
fig.update_layout(title='',
                  title_x = 0.5,
                  title_y = 0.9)

fig.update_geos(visible=True)
import plotly.offline as pyo
pyo.init_notebook_mode(connected=True)
pyo.iplot(fig)

In [None]:
import plotly.io as pio
pio.write_image(fig, 'Heatmap of MonkeyPox Infected cases Worlwid.png')
pio.write_image(fig, 'Heatmap of MonkeyPox Infected cases Worlwid.pdf')

# AutoArima

In [None]:
df=pd.read_csv('E:/Phd_dataset/MonkeyPox/Daily_Country_Wise_Confirmed_Cases.csv')
df.set_index('Country', inplace=True)
df.head()

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from pmdarima.arima import auto_arima
import pmdarima as pm
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings('ignore')

def fit_and_predict_ARIMA(data, train_test_split = 0.2, n_forecast = 28):
    results = []
    for country in data.index:
        ts = data.loc[country].astype(float)
        train = ts[:-int(len(ts)*train_test_split)]
        test = ts[-int(len(ts)*train_test_split):]
        model = auto_arima(train, seasonal=False, suppress_warnings=True, error_action="ignore", trace=False)
        future_forecast = model.predict(n_periods=n_forecast)
        rmse = np.sqrt(mean_squared_error(test, future_forecast))
        results.append({
            "Country": country,
            "AIC": model.aic(),
            "BIC": model.bic(),
            "Model": model,
            "RMSE": rmse
        })
    return pd.DataFrame(results)

arima_results = fit_and_predict_ARIMA(df)


arima_results

In [None]:
arima_results.to_csv("arima_results.csv")

# AutoArima top 10

In [None]:
df=pd.read_csv('E:/Phd_dataset/MonkeyPox/Daily_Country_Wise_Confirmed_Cases.csv', header=0, parse_dates=[0])
df = df.iloc[:10]
df.set_index('Country', inplace=True)
df

In [None]:
import pandas as pd
import numpy as np
from pmdarima.arima import auto_arima
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

def fit_and_predict_ARIMA(data, train_test_split = 0.2, n_forecast = 28):
    results = []
    fig, axs = plt.subplots(5, 2, sharex=True,figsize=(15, 12), dpi=650)

    # Needed to add spacing between 1st and 2nd row
    # Add a margin between the main title and sub-plots
    fig.subplots_adjust(hspace=0.9, top=0.9)
    # Add the main title
    fig.suptitle("Fitted Arima Model for top 10 Infected cases Worldwide", fontsize=18,fontweight="bold")
    axs = axs.flatten()
    for i, country in enumerate(data.index):
        ts = data.loc[country].astype(float)
        train = ts[:-int(len(ts)*train_test_split)]
        test = ts[-int(len(ts)*train_test_split):]
        model = auto_arima(train, seasonal=False, suppress_warnings=True, error_action="ignore", trace=False)
        future_forecast = model.predict(n_periods=n_forecast)
        future_forecast = np.maximum(future_forecast, 0)

        rmse = np.sqrt(mean_squared_error(test, future_forecast))
        results.append({
            "Country": country,
            "AIC": model.aic(),
            "BIC": model.bic(),
            "Model": model,
            "RMSE": rmse
        })
        axs[i].plot(train.index, train)
        axs[i].plot(test.index, test)
        axs[i].plot(test.index, future_forecast)
        axs[i].set_title(country)
        axs[i].xaxis.set_major_locator(mdates.AutoDateLocator())
        
    labels = ['Train','Test','Forecast' ]
    fig.legend(labels=labels,
           loc="upper right")
        
    fig.text(0.5, 0.05, '<---------------------------------------- Date ---------------------------------------->', ha='center',fontsize ='xx-large')
    fig.text(0.075, 0.5, '<------------------------------ Infected cases ------------------------------>', va='center', rotation='vertical',fontsize ='xx-large')
    plt.legend()
    plt.savefig("Fitted Arima Model for top 10 Infected cases Worldwide.pdf", format="pdf")
    plt.savefig("Fitted Arima Model for top 10 Infected cases Worldwide.png", format="png")
    plt.tight_layout()
    plt.show()
    return pd.DataFrame(results)

In [None]:
import pandas as pd
import numpy as np
from pmdarima.arima import auto_arima
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

def fit_and_predict_ARIMA(data, train_test_split=0.2, n_forecast=28):
    results = []
    fig, axs = plt.subplots(5, 2, sharex=True, figsize=(15, 12), dpi=650)

    # Needed to add spacing between 1st and 2nd row
    # Add a margin between the main title and sub-plots
    fig.subplots_adjust(hspace=0.9, top=0.9)
    # Add the main title
    fig.suptitle("Fitted ARIMA Model for Top 10 Infected Cases Worldwide", fontsize=18, fontweight="bold")
    axs = axs.flatten()

    for i, country in enumerate(data.index):
        ts = data.loc[country].astype(float)
        train = ts[:-int(len(ts) * train_test_split)]
        test = ts[-int(len(ts) * train_test_split):]
        model = auto_arima(train, seasonal=False, suppress_warnings=True, error_action="ignore", trace=False)
        future_forecast, conf_int = model.predict(n_periods=n_forecast, return_conf_int=True)
        future_forecast = np.maximum(future_forecast, 0)
        conf_int = np.maximum(conf_int, 0)

        # Calculate median and select the best confidence interval
        median = np.median(future_forecast)
        best_conf_int = conf_int[np.argmin(np.abs(conf_int[:, 1] - conf_int[:, 0])), :]

        rmse = np.sqrt(mean_squared_error(test, future_forecast))
        results.append({
            "Country": country,
            "AIC": model.aic(),
            "BIC": model.bic(),
            "Model": model,
            "RMSE": rmse,
            "Median": median,
            "Lower CI": best_conf_int[0],
            "Upper CI": best_conf_int[1]
        })

        axs[i].plot(train.index, train)
        axs[i].plot(test.index, test)
        axs[i].plot(test.index, future_forecast)
        axs[i].axhline(y=median, color='r', linestyle='--', label='Median')
        axs[i].fill_between(test.index, best_conf_int[0], best_conf_int[1], alpha=0.3)
        axs[i].set_title(country)
        axs[i].xaxis.set_major_locator(mdates.AutoDateLocator())

    labels = ['Train', 'Test', 'Forecast', 'Median']
    fig.legend(labels=labels, loc="upper right")

    #fig.text(0.5, 0.05, '<---------------------------------------- Date ---------------------------------------->',
             #ha='center', fontsize='xx-large')
    #fig.text(0.075, 0.5, '<------------------------------ Infected cases ------------------------------>',
             #va='center', rotation='vertical', fontsize='xx-large')

    plt.legend()
    plt.savefig("Fitted ARIMA Model for Top 10 Infected Cases Worldwide.pdf", format="pdf")
    plt.savefig("Fitted ARIMA Model for Top 10 Infected Cases Worldwide.png", format="png")
    plt.tight_layout()
    plt.show()

    return pd.DataFrame(results)


In [None]:
arima_results = fit_and_predict_ARIMA(df)

In [None]:
import pandas as pd
import numpy as np
from pmdarima.arima import auto_arima
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

def fit_and_predict_ARIMA(data, train_test_split=0.2, n_forecast=28):
    results = []
    fig, axs = plt.subplots(5, 2, sharex=True, figsize=(15, 12), dpi=650)

    # Needed to add spacing between 1st and 2nd row
    # Add a margin between the main title and sub-plots
    fig.subplots_adjust(hspace=0.9, top=0.9)
    # Add the main title
    fig.suptitle("Fitted ARIMA Model for Top 10 Infected Cases Worldwide", fontsize=18, fontweight="bold")
    axs = axs.flatten()

    for i, country in enumerate(data.index):
        ts = data.loc[country].astype(float)
        train = ts[:-int(len(ts) * train_test_split)]
        test = ts[-int(len(ts) * train_test_split):]
        model = auto_arima(train, seasonal=False, suppress_warnings=True, error_action="ignore", trace=False)
        future_forecast, conf_int = model.predict(n_periods=n_forecast, return_conf_int=True)
        future_forecast = np.maximum(future_forecast, 0)
        conf_int = np.maximum(conf_int, 0)

        rmse = np.sqrt(mean_squared_error(test, future_forecast))
        results.append({
            "Country": country,
            "AIC": model.aic(),
            "BIC": model.bic(),
            "Model": model,
            "RMSE": rmse,
            "Lower CI": conf_int[:, 0],
            "Upper CI": conf_int[:, 1]
        })

        axs[i].plot(train.index, train)
        axs[i].plot(test.index, test)
        axs[i].plot(test.index, future_forecast)
        axs[i].fill_between(test.index, conf_int[:, 0], conf_int[:, 1], alpha=0.3)
        axs[i].set_title(country)
        axs[i].xaxis.set_major_locator(mdates.AutoDateLocator())

    labels = ['Train', 'Test', 'Forecast']
    fig.legend(labels=labels, loc="upper right")

    fig.text(0.5, 0.05, '<---------------------------------------- Date ---------------------------------------->',
             ha='center', fontsize='xx-large')
    fig.text(0.075, 0.5, '<------------------------------ Infected cases ------------------------------>',
             va='center', rotation='vertical', fontsize='xx-large')

    plt.legend()
    plt.savefig("Fitted ARIMA Model for Top 10 Infected Cases Worldwide.pdf", format="pdf")
    plt.tight_layout()
    plt.show()

    return pd.DataFrame(results)

In [None]:
arima_results = fit_and_predict_ARIMA(df)

In [None]:
arima_results

In [None]:
arima_results.to_csv('arima_results_top10.csv', index=False)


# Random Forest for top 10

In [None]:
df=pd.read_csv('E:/Phd_dataset/MonkeyPox/Daily_Country_Wise_Confirmed_Cases.csv', header=0, parse_dates=[0])
df = df.iloc[:10]
df

In [None]:
def reshape_data(df, country_col='Country'):
    df_long = pd.melt(df, id_vars=[country_col], var_name='Date', value_name='Confirmed')
    df_long['Date'] = pd.to_datetime(df_long['Date'])
    return df_long
df = reshape_data(df)
df=df[['Country', 'Confirmed']]
df

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

def random_forest(dataframe, column_name, test_size, forecast_periods, rows, columns):
    processed_data = {}
    models = {}
    forecast_values = {}
    rmse = {}
    country_list = dataframe["Country"].unique()
    fig, axs = plt.subplots(rows, columns,sharex = True ,figsize=(15, 12), dpi=650)

    # Needed to add spacing between 1st and 2nd row
    # Add a margin between the main title and sub-plots
    fig.subplots_adjust(hspace=0.9, top=0.9)
    # Add the main title
    fig.suptitle("Fitted Random Forest Model for Monkey cases Worlwide", fontsize=18,fontweight="bold")
    
    
    for i, country in enumerate(country_list):
        # Extract the data for the country
        country_data = dataframe[dataframe["Country"] == country]
        processed_data[country] = country_data
        
        # Split the data into train and test sets
        train_data = country_data[column_name][:-test_size]
        test_data = country_data[column_name][-test_size:]
        
        # Fit the Random Forest model
        model = RandomForestRegressor(n_estimators=113, random_state=42)
        X_train = np.array(range(0, len(train_data))).reshape(-1, 1)
        y_train = train_data.values.ravel()
        model.fit(X_train, y_train)
        models[country] = model
        
        # Forecast future values
        X_forecast = np.array(range(len(train_data), len(train_data) + len(test_data))).reshape(-1, 1)
        forecast = model.predict(X_forecast)
        forecast = np.maximum(forecast, 0)
        forecast_values[country] = forecast
        
        # Evaluate the model using RMSE
        rmse[country] = np.sqrt(mean_squared_error(test_data, forecast))
        
        # Plot the forecasted values along with the test data
        axs[i//columns, i%columns].plot(train_data.index, train_data)
        axs[i//columns, i%columns].plot(test_data.index, test_data, color ='orange' )
        axs[i//columns, i%columns].plot(test_data.index, forecast, color = 'green')
        axs[i//columns, i%columns].set_title(country)
        max_forecasted = forecast.max()
        min_forecasted = forecast.min()
        max_test_data = test_data.max()
        min_test_data = test_data.min()
    
    labels = ['Train', 'Actual','Forecast']
    fig.legend(labels=labels,
           loc="upper right")

    fig.text(0.5, 0.08, '<---------------------------------------- Index ---------------------------------------->', ha='center',fontsize ='xx-large')
    fig.text(0.05, 0.55, '<------------------------------ Infected cases ------------------------------>', va='center', rotation='vertical',fontsize ='xx-large')

    plt.savefig("Fitted Random Forest Model for MOnkeypox cases Worldwide.pdf", format="pdf")

    plt.legend()
    plt.tight_layout()
    plt.show()
    print("RMSE: ", rmse)
    rmse_df = pd.DataFrame(rmse.items(), columns=['Country', 'RMSE'])
    rmse_df.to_csv('RMSE_scores.csv', index=False)
    
    return processed_data, models, forecast_values


### Cross-validation for random forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_predict

def random_forest(dataframe, column_name, k_folds, forecast_periods, rows, columns):
    processed_data = {}
    models = {}
    forecast_values = {}
    rmse = {}
    country_list = dataframe["Country"].unique()
    fig, axs = plt.subplots(rows, columns, sharex=True, figsize=(15, 12), dpi=650)

    # Needed to add spacing between 1st and 2nd row
    # Add a margin between the main title and sub-plots
    fig.subplots_adjust(hspace=0.9, top=0.9)
    # Add the main title
    fig.suptitle("Fitted Random Forest Model for Monkey cases Worldwide", fontsize=18, fontweight="bold")

    for i, country in enumerate(country_list):
        # Extract the data for the country
        country_data = dataframe[dataframe["Country"] == country]
        processed_data[country] = country_data

        # Split the data into features (X) and target (y)
        X = np.array(range(len(country_data))).reshape(-1, 1)
        y = country_data[column_name].values

        # Fit the Random Forest model
        model = RandomForestRegressor(n_estimators=113, random_state=42)
        model.fit(X, y)  # Fit the model with the data
        models[country] = model

        # Perform k-fold cross-validation
        forecast = cross_val_predict(model, X, y, cv=k_folds)
        forecast = np.maximum(forecast, 0)

        # Forecast future values
        X_forecast = np.array(range(len(X), len(X) + forecast_periods)).reshape(-1, 1)
        future_forecast = model.predict(X_forecast)
        future_forecast = np.maximum(future_forecast, 0)
        forecast_values[country] = future_forecast

        # Evaluate the model using RMSE
        rmse[country] = np.sqrt(mean_squared_error(y, forecast))

        # Plot the forecasted values along with the actual data
        axs[i // columns, i % columns].plot(X, y)
        axs[i // columns, i % columns].plot(X_forecast, future_forecast, color='green')
        axs[i // columns, i % columns].set_title(country)
        max_forecasted = future_forecast.max()
        min_forecasted = future_forecast.min()
        max_y = y.max()
        min_y = y.min()

    labels = ['Actual', 'Forecast']
    fig.legend(labels=labels, loc="upper right")

    fig.text(0.5, 0.08, '<---------------------------------------- Index ---------------------------------------->',
             ha='center', fontsize='xx-large')
    fig.text(0.05, 0.55, '<------------------------------ Infected cases ------------------------------>',
             va='center', rotation='vertical', fontsize='xx-large')

    plt.savefig("Fitted Random Forest Model for Monkeypox cases Worldwide.pdf", format="pdf")

    plt.legend()
    plt.tight_layout()
    plt.show()
    print("RMSE: ", rmse)
    rmse_df = pd.DataFrame(rmse.items(), columns=['Country', 'RMSE'])
    rmse_df.to_csv('RMSE_scores.csv', index=False)

    return processed_data, models, forecast_values


In [None]:
k_folds = 5  # Choose the number of k-folds
processed_data, models, forecast_values = random_forest(df, 'Confirmed', k_folds, forecast_periods=28, rows=5, columns=2)


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

def random_forest(dataframe, column_name, test_size, forecast_periods, rows, columns):
    processed_data = {}
    models = {}
    forecast_values = {}
    confidence_intervals = {}
    median_values = {}
    rmse = {}
    country_list = dataframe["Country"].unique()
    fig, axs = plt.subplots(rows, columns, sharex=True, figsize=(15, 12), dpi=650)

    # Needed to add spacing between 1st and 2nd row
    # Add a margin between the main title and sub-plots
    fig.subplots_adjust(hspace=0.9, top=0.9)
    # Add the main title
    fig.suptitle("Fitted Random Forest Model for Monkey Cases Worldwide", fontsize=18, fontweight="bold")

    for i, country in enumerate(country_list):
        # Extract the data for the country
        country_data = dataframe[dataframe["Country"] == country]
        processed_data[country] = country_data

        # Split the data into train and test sets
        train_data = country_data[column_name][:-test_size]
        test_data = country_data[column_name][-test_size:]

        # Fit the Random Forest model
        model = RandomForestRegressor(n_estimators=113, random_state=42)
        X_train = np.array(range(0, len(train_data))).reshape(-1, 1)
        y_train = train_data.values.ravel()
        model.fit(X_train, y_train)
        models[country] = model

        # Forecast future values
        X_forecast = np.array(range(len(train_data), len(train_data) + len(test_data))).reshape(-1, 1)
        forecast = model.predict(X_forecast)
        forecast = np.maximum(forecast, 0)
        forecast_values[country] = forecast

        # Calculate confidence interval and median
        lower_ci = forecast - 1.96 * np.std(forecast)
        upper_ci = forecast + 1.96 * np.std(forecast)
        lower_ci = np.maximum(lower_ci, 0)
        confidence_intervals[country] = (lower_ci, upper_ci)
        median_values[country] = np.median(forecast)

        # Evaluate the model using RMSE
        rmse[country] = np.sqrt(mean_squared_error(test_data, forecast))

        # Plot the forecasted values along with the test data
        axs[i // columns, i % columns].plot(train_data.index, train_data)
        axs[i // columns, i % columns].plot(test_data.index, test_data, color='orange')
        axs[i // columns, i % columns].plot(test_data.index, forecast, color='green')
        axs[i // columns, i % columns].fill_between(test_data.index, lower_ci, upper_ci, alpha=0.3)
        axs[i // columns, i % columns].axhline(np.median(forecast), color='gray', linestyle='--')
        axs[i // columns, i % columns].set_title(country)
        axs[i // columns, i % columns].set_xlabel("Index")

    labels = ['Train', 'Actual', 'Forecast', 'Confidence Interval', 'Median']
    fig.legend(labels=labels, loc="upper right")

    #fig.text(0.5, 0.08, '<---------------------------------------- Index ---------------------------------------->', ha='center',fontsize ='xx-large')
    #fig.text(0.05, 0.55, '<------------------------------ Infected cases ------------------------------>', va='center', rotation='vertical',fontsize ='xx-large')

    plt.savefig("Fitted Random Forest Model for MOnkeypox cases Worldwide.pdf", format="pdf")
    plt.savefig("Fitted Random Forest Model for MOnkeypox cases Worldwide.png", format="png")

    plt.legend()
    plt.tight_layout()
    plt.show()
    print("RMSE: ", rmse)
    rmse_df = pd.DataFrame(rmse.items(), median_values.items(), columns=['Country', 'RMSE'])
    rmse_df.to_csv('RMSE_scores.csv', index=False)
    
    return processed_data, models, forecast_values


In [None]:
processed_data, models, forecast_values = random_forest(df, "Confirmed", 
                                                        test_size=28, forecast_periods=28, rows=5, columns=2)

# Stationarity

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
import math

In [None]:
def check_stationarity(df):
    # Create a new dataframe to store results
    results = pd.DataFrame(columns=['Country', 'ADF Test Statistic', 'p-value'])
    # Group the data by country
    country_groups = df.groupby('Country')
    number_of_countries = df['Country'].nunique()
    plt.figure()
    fig, axes = plt.subplots(nrows=10, ncols=2,sharex =True, figsize=(18, 15),dpi=650)
    axes = axes.flatten()
    # Needed to add spacing between 1st and 2nd row
    # Add a margin between the main title and sub-plots
    fig.subplots_adjust(hspace=0.9, top=0.9)
    # Add the main title
    #fig.suptitle("ACF and PACF plots for MonkeyPox cases of Top 10 Countries", fontsize=18,fontweight="bold")    
    j=0 
    
    # Iterate through each country
    for i, (country, group) in enumerate(country_groups):
        # Extract the cases for the country
        cases = group['Confirmed']

        # Perform the ADF test
        adf_test = adfuller(cases)

        # Extract the test statistic and p-value
        adf_stat = adf_test[0]
        p_value = adf_test[1]

        # Add the results to the dataframe
        results = results.append({'Country': country, 'ADF Test Statistic': adf_stat, 'p-value': p_value}, ignore_index=True)

        # check for stationarity
        if p_value < 0.05:
            print(f'{country} data is stationary')
        else:
            print(f'{country} data is non-stationary')

        # Plot the ACF and PACF    
        plot_acf(cases, lags=35, alpha=0.05,ax=axes[j])
        axes[j].set_title(f'ACF for {country}',fontsize = 14)
        
        plot_pacf(cases, lags=35, alpha=0.05,ax=axes[j+1])
        axes[j+1].set_title(f'PACF for {country}',fontsize = 14)
        j+=2
   
    fig.text(0.5, 0.06, '<---------------------------------------- Lag ---------------------------------------->', ha='center',fontsize ='xx-large')
    fig.text(0.08, 0.5, '<------------------------------ Correlation ------------------------------>', va='center', rotation='vertical',fontsize ='xx-large')        

    plt.savefig("ACF and PACF plots for MonkeyPox cases of Top 10 Countries.pdf", format="pdf")
    plt.savefig("ACF and PACF plots for MonkeyPox cases of Top 10 Countries.png", format="png")
    plt.tight_layout()
    plt.show()
    return results

In [None]:
results = check_stationarity(df)
print(results)


# New distribution fitting

In [None]:
import pandas as pd
import scipy.stats as st

df = pd.read_csv('E:/Phd_dataset/MonkeyPox/Daily_Country_Wise_Confirmed_Cases.csv', header=0, parse_dates=[0])
df = df.iloc[:10]
df

In [None]:
def reshape_data(df, country_col='Country'):
    df_long = pd.melt(df, id_vars=[country_col], var_name='Date', value_name='Confirmed')
    df_long['Date'] = pd.to_datetime(df_long['Date'])
    return df_long

df = reshape_data(df)

df = df[['Date', 'Country', 'Confirmed']]
df = df.sort_values(by = 'Country')
df

In [None]:
df =  df.sort_values(by = ['Country','Date'])
df

In [None]:
# drop all rows where confirmed is 0
df = df[df['Confirmed'] != 0]
df

In [None]:
df.to_csv('New Format Data Monkeypox.csv')

In [None]:
from distfit import distfit
import pandas as pd
import matplotlib.pyplot as plt


# List of unique state codes
Country = df['Country'].unique()

# Initialize the distribution fitting object
dfit = distfit(distr='full')

# Set up the subplots
fig, axs = plt.subplots(nrows=5, ncols=2, figsize=(15, 18), dpi = 650)
#fig.suptitle('Best fit Distribution of Monkeypox Confirmed Cases for top 10 infected Countries', fontsize=18, fontweight='bold')

# Loop over all state codes
for i, code in enumerate(Country):
    # Filter data for the current state code
    state_data = df.loc[df['Country']==code, ['Country', 'Confirmed']]
    
    # Fit distributions on empirical data X
    dfit.fit_transform(state_data['Confirmed'])
    
    # Plot the distribution for the current state code
    ax = axs[i//2, i%2]
    dfit.plot(title=code, ax=ax)
    ax.title.set_size(14)
    ax.title.set_fontweight('bold')
    
# Set spacing between subplots
plt.subplots_adjust(hspace=0.9, wspace=0.4)
plt.savefig('Best fit distribution of top 10 Countries.pdf', format = 'pdf')
plt.savefig('Best fit distribution of top 10 Countries.png', format = 'png')

plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Group the data by state code and calculate the mean and standard deviation
grouped = df.groupby('Country')['Confirmed'].agg(['mean', 'std'])

# Create a histogram of mean values
fig, ax = plt.subplots()
ax.bar(grouped.index, grouped['mean'], yerr=grouped['std'], align='center', alpha=0.5)
ax.set_ylabel('Mean Confirmed Cases')
ax.set_xticks(grouped.index)
ax.set_xticklabels(grouped.index, rotation=45, ha='right')
ax.set_title('Mean and Standard Deviation of Confirmed Cases by Country')
ax.set_xlabel('Country')


plt.show()
#In this code, we use the groupby function to group the data by statecode and calculate the mean and standard deviation of the confirmed column for each group. Then, we create a bar chart using the bar function, setting the x values to the statecodes, the y values to the mean values, and the error bars to the standard deviation values. Finally, we set the x-tick labels to the statecodes and rotate them by 45 degrees for readability.



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def mean_sd_days(df):
    # Group data by state code and calculate mean and standard deviation
    state_stats = df.groupby('Country')['Confirmed'].agg(['mean', 'std'])
    state_stats.reset_index(inplace=True)
    for _, row in state_stats.iterrows():
        mean = row['mean']
        std = row['std']
        statecode = row['Country']
    
    # Create dataframe and return it
    state_counts = df['Country'].value_counts().reset_index()
    state_counts.columns = ['Country', 'days']
    state_stats = state_stats.merge(state_counts, on='Country')
    return state_stats


In [None]:
mean_sd_days(df)

In [None]:
mean_sd_days(df).to_csv('Mean_Sd_Days.csv')