In [3]:
# Load the data from ./data/AI_index_db.csv
import numpy as np
import pandas as pd
import dataframe_image as dfi

# Load the data
data = pd.read_csv('./data/AI_index_db.csv')

# Display the first few rows of the data
print(data.head())


                    Country  Talent  Infrastructure  Operating Environment  \
0  United States of America  100.00           94.02                  64.56   
1                     China   16.51          100.00                  91.57   
2            United Kingdom   39.65           71.43                  74.65   
3                    Canada   31.28           77.05                  93.94   
4                    Israel   35.76           67.58                  82.44   

   Research  Development  Government Strategy  Commercial  Total score  \
0    100.00       100.00                77.39      100.00       100.00   
1     71.42        79.97                94.87       44.02        62.92   
2     36.50        25.03                82.82       18.91        40.93   
3     30.67        25.78               100.00       14.88        40.19   
4     32.63        27.96                43.91       27.33        39.89   

         Region                Cluster  Income group   Political regime  
0      Ameri

In [23]:
# Get the top 5 countries by "Total score"
top_5_countries = data.sort_values(by='Total score', ascending=False).head(5)

# display only the columns "Country" and "Total score"
print(top_5_countries[['Country', 'Total score']])

# export png

# map row names to rank
top_5_countries['Rank'] = range(1, 6)
top_5_countries.set_index('Rank', inplace=True)
dfi.export(top_5_countries[['Country', 'Total score']], 'top_5_countries.png')

                    Country  Total score
0  United States of America       100.00
1                     China        62.92
2            United Kingdom        40.93
3                    Canada        40.19
4                    Israel        39.89


In [25]:
# Rank the class "Region" by the average "Total score" for each region

# Group the data by "Region"
grouped_data = data.groupby('Region')

# Calculate the total score for each region
total_score = grouped_data['Total score'].sum()

# Rank the regions by the total score divided by the number of countries in the region
ranked_regions = total_score / grouped_data['Country'].count()

# Display the ranked regions
print(ranked_regions.sort_values(ascending=False))

# export png
ranked_regions = ranked_regions.sort_values(ascending=False)
ranked_regions = ranked_regions.reset_index()
ranked_regions['Rank'] = range(1, len(ranked_regions) + 1)
ranked_regions.set_index('Rank', inplace=True)
# set column name
ranked_regions.columns = ['', 'Average Region Score']


dfi.export(ranked_regions, 'ranked_regions.png')

Region
Americas        29.031250
Asia-Pacific    25.792143
Europe          25.493103
Middle East     19.656667
Africa           6.426000
dtype: float64


In [20]:
# For each index: 'Talent', 'Infrastructure', 'Operating Environment', 'Research', 'Development', 'Government Strategy', 'Commercial', 'Total score'
# Display a table that shows:
# - The mean value of the index
# - The median value of the index
# - The standard deviation of the index
# - The name of the country with the highest value of the index
# - The name of the country with the lowest value of the index
# export the table to a png file

columns = ['Talent', 'Infrastructure', 'Operating Environment', 'Research', 'Development', 'Government Strategy', 'Commercial', 'Total score']
row_titles = ['Mean Value of the index', 'Median Value of the index', 'Standard Deviation of the index', 'Country with the highest value (100)', 'Country with the lowest value (0)']

# Create a table to store the results
table = pd.DataFrame(index=row_titles, columns=columns)

# Calculate the mean, median, and standard deviation of each index
for column in columns:
    # format numerical values to 2 decimal places
    table.loc['Mean Value of the index', column] = format(data[column].mean(), '.2f')
    table.loc['Median Value of the index', column] = format(data[column].median(), '.2f')
    table.loc['Standard Deviation of the index', column] = format(data[column].std(), '.2f')
    table.loc['Country with the highest value (100)', column] = data.loc[data[column].idxmax()]['Country']
    table.loc['Country with the lowest value (0)', column] = data.loc[data[column].idxmin()]['Country']


dfi.export(table, 'table.png')

# split the data in 2 images
dfi.export(table.iloc[:, :4], 'table1.png')
dfi.export(table.iloc[:, 4:], 'table2.png')


