In [47]:
import pandas as pd
import numpy as np
from tabulate import tabulate 

<i>Question 5</i>

For every country and continent, find the lean and peak quarters of outgoing tourists.<br>

In [48]:
years = ['2014','2016','2017','2019','2020','2021','2022']
regions = ['North America', 'Central & South America', 'Western Europe', 'Eastern Europe', 'Africa', 'West Asia', 'South Asia', 'South East Asia', 'East Asia', 'Australasia']

In [49]:
def find_data(dict_name, year, type, name): # year and type to be provided in string format. type is 'countries' or 'regions'.
    for row in dict_name[year][type]:
        if row[0].lower() == name.lower():
            retval = row.copy()
            retval[1] = int(retval[1])
            return tuple(retval[1:])
    return None

In [50]:
data = {}
cubes = {}

for y in years:
    data[f'df{y[-2:]}'] = pd.read_excel(rf"data\TourismData-{y}\QUARTERWISE PERCENTAGE DISTRIBUTION OF DESTINATION COUNTRY WISE DEPARTURES OF INDIAN NATIONALS.xlsx") ## to store pandas dataframe

for y in years:
    df = data[f'df{y[-2:]}']
    region_indices = df[df['Country of Nationality'].isin(regions)].index.tolist()
    split_data = [] ## empty list to hold the region separated 2D lists to be created

    # Iterate over the indices, splitting the DataFrame into segments
    for i in range(len(region_indices)):
        # Define the start and end indices for each split
        start_idx = region_indices[i]
        end_idx = region_indices[i + 1] if i + 1 < len(region_indices) else len(df)        
        # Extract the section and convert it to a list of lists (2D list)
        section = df.iloc[start_idx:end_idx].values.tolist()
        split_data.append(section) 
    # Dataframe split into 2D lists based on region
    # Format of 2D lists created : [['North America', nan, nan, nan, nan, nan], ['Canada', 289259, 21.30, 18, 20.60, 40.10], ['USA', 1403399, 15.10, 27, 22.10, 35.80]]
    # 1st row contains the name of the region with empty spaces for sum of features, followed by countries in that region with their features. 

    # print(len(split_data),split_data)
    # Calculate region-wise sum for quarter-wise and total tourists
    for i in range(len(split_data)):
        chunk = split_data[i]
        size = len(chunk[0])
        total = 0
        grp = (size-2)*[0]
        for j in range(1,len(chunk)):
            total+=chunk[j][1]
            for k in range(size-2):
                grp[k]+=chunk[j][2+k]*chunk[j][1]
        chunk[0][1] = int(total)
        
        for k in range(size-2):
            chunk[0][2+k] = grp[k]/total

    for i in range(len(split_data)):
        for j in range(len(split_data[i])):
            for k in range(2,len(split_data[i][j])):
                split_data[i][j][k] = round(split_data[i][j][k]/100,2)
    
    # print(split_data)
    cubes[f'cube{y[-2:]}'] = split_data

In [51]:
answer = {}
for y in years:
    cube = cubes[f'cube{y[-2:]}']
    ans = {}
    ans['countries'] = []
    ans['regions'] = []

    for chunk in cube:
        ans['regions'].append(chunk[0])
        for country_data in chunk[1:]:
                if(country_data[0] != 'Others'):
                    ans['countries'].append(country_data)
    
    size = len(cube[0][0])
    grp = (size-2)*[0]
    total_overall = 0
    for region in ans['regions']:
        total_overall+=region[1]
        for k in range(size-2):
            grp[k] += region[k+2]*region[1]
    
    ans['overall'] = ['Overall', total_overall]+[round(val/total_overall,2) for val in grp]
    answer[f'{y}'] = ans

In [52]:
countries = set()
for y in years:
    data = answer[f'{y}']['countries']
    for country in [c_data[0] for c_data in data]:
        countries.add(country)
countries = sorted(list(countries))

In [53]:
table_c = {} 
table_r = {} 
table_o = {} 
table_c['Country'] = countries
table_r['Continents'] = regions
table_o[' '] = ['Overall']

for y in years:
    if(y == '2014'):
        continue
    table_c[f'{y}'] = []
    for c in countries:
        table_c[f'{y}'].append(find_data(answer, f'{y}', 'countries', c))
    
    table_r[f'{y}'] = []
    for r in regions:
        table_r[f'{y}'].append(find_data(answer, f'{y}', 'regions', r))
    
    table_o[f'{y}'] = [tuple(answer[f'{y}']['overall'][1:])]

df_o = pd.DataFrame(table_o)

df_r = pd.DataFrame(table_r)

df_c = pd.DataFrame(table_c)

In [54]:
max_quarters = {} 
min_quarters = {}

years = df_r.columns[1:]

for i, row in df_r.iterrows():
    region = row['Continents']
    max_quarters[region] = {}
    min_quarters[region] = {}
    
    for year in years:
        data = row[year][1:]
        
        max_quarter = np.argmax(data) + 1  # Adding 1 to make it 1-based
        min_quarter = np.argmin(data) + 1  
        
        max_quarters[region][year] = f"Quarter {max_quarter}"
        min_quarters[region][year] = f"Quarter {min_quarter}"

    avg_data = np.mean([row[year][1:] for year in years], axis=0)
    overall_max_quarter = np.argmax(avg_data) + 1
    overall_min_quarter = np.argmin(avg_data) + 1
    
    max_quarters[region]['Overall'] = f"Quarter {overall_max_quarter}"
    min_quarters[region]['Overall'] = f"Quarter{overall_min_quarter}"

max_quarters_df = pd.DataFrame(max_quarters).T
min_quarters_df = pd.DataFrame(min_quarters).T



In [55]:
print("Table 1: Table showing the quarter with maximum tourists going to each region for each year and overall:")
print(tabulate(max_quarters_df, headers='keys', tablefmt='grid'))
print("\n")

Table 1: Table showing the quarter with maximum tourists going to each region for each year and overall:
+-------------------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
|                         | 2016      | 2017      | 2019      | 2020      | 2021      | 2022      | Overall   |
| North America           | Quarter 4 | Quarter 4 | Quarter 2 | Quarter 1 | Quarter 1 | Quarter 2 | Quarter 1 |
+-------------------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Central & South America | Quarter 1 | Quarter 1 | Quarter 1 | Quarter 1 | Quarter 3 | Quarter 3 | Quarter 1 |
+-------------------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Western Europe          | Quarter 1 | Quarter 1 | Quarter 2 | Quarter 1 | Quarter 3 | Quarter 3 | Quarter 1 |
+-------------------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+

In [56]:
print("\nTable 2: Table showing the quarter with minimum tourists going to each region for each year and overall:")
print(tabulate(min_quarters_df, headers='keys', tablefmt='grid'))


Table 2: Table showing the quarter with minimum tourists going to each region for each year and overall:
+-------------------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
|                         | 2016      | 2017      | 2019      | 2020      | 2021      | 2022      | Overall   |
| North America           | Quarter 2 | Quarter 2 | Quarter 4 | Quarter 2 | Quarter 2 | Quarter 1 | Quarter2  |
+-------------------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Central & South America | Quarter 2 | Quarter 2 | Quarter 4 | Quarter 2 | Quarter 1 | Quarter 1 | Quarter2  |
+-------------------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Western Europe          | Quarter 2 | Quarter 2 | Quarter 4 | Quarter 2 | Quarter 2 | Quarter 1 | Quarter2  |
+-------------------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------

In [57]:
max_quarters_c = {}
min_quarters_c = {}

years = df_c.columns[1:]

for i, row in df_c.iterrows():
    country = row['Country']
    max_quarters_c[country] = {}
    min_quarters_c[country] = {}
    
    for year in years:
        if row[year]:
            data = row[year][1:]
            
            max_quarter = np.argmax(data) + 1
            min_quarter = np.argmin(data) + 1  
            
            max_quarters_c[country][year] = f"Quarter {max_quarter}"
            min_quarters_c[country][year] = f"Quarter {min_quarter}"
        else:
            max_quarters_c[country][year] = None
            min_quarters_c[country][year] = None
            
    avg_data = np.mean([row[year][1:] for year in years if row[year]], axis=0)
    overall_max_quarter = np.argmax(avg_data) + 1
    overall_min_quarter = np.argmin(avg_data) + 1
    
    max_quarters_c[country]['Overall'] = f"Quarter {overall_max_quarter}"
    min_quarters_c[country]['Overall'] = f"Quarter{overall_min_quarter}"

max_quarters_df = pd.DataFrame(max_quarters_c).T
min_quarters_df = pd.DataFrame(min_quarters_c).T


In [58]:
print("Table 3: Table showing the quarter with maximum tourists going to each country for each year and overall:")
print(tabulate(max_quarters_df, headers='keys', tablefmt='grid'))
print("\n")

Table 3: Table showing the quarter with maximum tourists going to each country for each year and overall:
+----------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
|                | 2016      | 2017      | 2019      | 2020      | 2021      | 2022      | Overall   |
| Afghanistan    | Quarter 1 | Quarter 1 | Quarter 4 | Quarter 1 | Quarter 1 | Quarter 4 | Quarter 1 |
+----------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Argentina      | Quarter 1 | Quarter 1 | Quarter 1 | Quarter 1 | Quarter 1 | Quarter 4 | Quarter 1 |
+----------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Australia      | Quarter 4 | Quarter 4 | Quarter 1 | Quarter 1 | Quarter 2 | Quarter 4 | Quarter 1 |
+----------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Austria        | Quarter 1 | Quarter 1 | Quarter 2 | Quarter 1 | Qua

In [59]:
print("\nTable 5: Table showing the quarter with minimum tourists going to each country for each year and overall:")
print(tabulate(min_quarters_df, headers='keys', tablefmt='grid'))


Table 5: Table showing the quarter with minimum tourists going to each country for each year and overall:
+----------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
|                | 2016      | 2017      | 2019      | 2020      | 2021      | 2022      | Overall   |
| Afghanistan    | Quarter 2 | Quarter 2 | Quarter 2 | Quarter 2 | Quarter 3 | Quarter 1 | Quarter2  |
+----------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Argentina      | Quarter 2 | Quarter 2 | Quarter 2 | Quarter 2 | Quarter 2 | Quarter 1 | Quarter2  |
+----------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Australia      | Quarter 2 | Quarter 2 | Quarter 3 | Quarter 3 | Quarter 1 | Quarter 1 | Quarter3  |
+----------------+-----------+-----------+-----------+-----------+-----------+-----------+-----------+
| Austria        | Quarter 2 | Quarter 2 | Quarter 1 | Quarter 2 | Qu