In [1]:
import pandas as pd
import numpy as np
import darts
import plotly as plt
from datetime import datetime, timedelta
import glob

# Data loading

In [2]:
df = pd.read_excel('../RawData/Book1.xlsx')
df.head()

Unnamed: 0,Country,Fiscal Year,Fiscal Week,Global DGV NS Visits
0,Australia,2024-FY,2024-W01,79932
1,Australia,2024-FY,2024-W02,78220
2,Australia,2024-FY,2024-W03,73804
3,Australia,2024-FY,2024-W04,73641
4,Australia,2024-FY,2024-W05,70572


# PreProcessing Data

In [3]:
def get_year(year):
    year = year.split('-')[0]
    return int(year) 
def get_week(week):
    week = week.split('-')[1][1:]
    return int(week) 
def convert_to_datetime(row):
    
    year = row['year']
    week_number = row['week_number']
    date = datetime.strptime(f'{year}-{week_number}','%Y-%U')
    return date

    
for name ,df_grouped in df.groupby('Country'):
    print('Proprocessing the dataset of ',name)
    index_grouped = df_grouped  
    # Convert "Fiscal Week" to datetime
    index_grouped['year'] =  index_grouped['Fiscal Week'].apply(get_year)
    index_grouped['week_number'] =  index_grouped['Fiscal Week'].apply(get_week)
    
    
    # index_grouped["start_of_week"] = index_grouped.apply(convert_to_datetime, axis=1)
    index_grouped['date'] = pd.to_datetime(index_grouped['year'].astype(str) + '-' + index_grouped['week_number'].astype(str) + '-1', format='%Y-%U-%w')


    sorted_df = index_grouped.sort_values(by=['year','week_number'])
    sorted_df.reset_index(inplace=True,drop=True)
    sorted_df.to_csv(f'../ProcessedData/{name}.csv',index=False)


Proprocessing the dataset of  Australia
Proprocessing the dataset of  Brazil
Proprocessing the dataset of  Canada
Proprocessing the dataset of  China
Proprocessing the dataset of  India
Proprocessing the dataset of  Japan
Proprocessing the dataset of  Malaysia
Proprocessing the dataset of  Netherlands
Proprocessing the dataset of  New Zealand
Proprocessing the dataset of  Singapore
Proprocessing the dataset of  Spain
Proprocessing the dataset of  Sweden
Proprocessing the dataset of  Switzerland
Proprocessing the dataset of  United Kingdom
Proprocessing the dataset of  United States


# Visualization 

In [10]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_scatter(df):
    county_name = df['Country'].unique()[0]
    # Convert year and week_number columns to datetime
    df['date'] = pd.to_datetime(df['year'].astype(str) + '-' + df['week_number'].astype(str) + '-1', format='%Y-%U-%w')
    
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=df['date'], y=df['Global DGV NS Visits'], mode='markers+lines', name='Global DGV NS Visits'))
    
    fig.update_layout(
        title=f'Visualization of Global DGV NS Visits vs {county_name}',
        xaxis_title='Date',
        yaxis_title='Global DGV NS Visits',
        height=600,
        width=1800,
        font_size=14
    )
    # fig.show()
    fig.write_html(f'../plots/{county_name}.html')


In [11]:
path_list = glob.glob('../ProcessedData/*.csv')

for path in path_list:
    df =  pd.read_csv(path)
    # calling for the visualisation 
    plot_scatter(df)
    # break

In [9]:
# meta_information_country = {}
# for path in glob.glob('../plots/*.html'):
#     country = path.split('/')[-1].split('.')[0]
#     meta_information_country[country] =path
    
# meta_information_country