In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis

In [None]:
""" READING CSV FILE """
df_world = pd.read_csv('gdp.csv')

In [None]:
df_world.head()

In [None]:
df_world.info() 

In [None]:
# Counting the number of occurences
country_counts = df_world['Country'].value_counts()

# To display the count of each country
print(country_counts)

In [None]:
duplicateCL=df_world.T.duplicated()
duplicate_col_names = df_world.columns[duplicateCL]


print("Duplicate columns:", list(duplicate_col_names))

In [None]:
# To check null Value in dataset 
df_world.isnull().sum()

In [None]:
rows_with_null = df_world[df_world.isnull().any(axis=1)]
print("Rows with null values:")
print(rows_with_null)

In [None]:
df_world.tail(5)

In [None]:
df_world.describe()

In [None]:
# Calculate descriptive statistics
gdp_mean = df_world.mean()
gdp_median = df_world.median()
gdp_std = df_world.std()
gdp_skew = skew(df_world, axis=0, nan_policy='omit')
gdp_kurtosis = kurtosis(df_world, axis=0, nan_policy='omit')

# Correlation matrix
correlation_matrix = df_world.corr()

In [None]:
#making copy of original df 
df_modified = df_world.copy()

In [None]:
df_modified['Average_GDP'] = df_modified.iloc[:, 1:].mean(axis=1)
print(df_modified)


In [None]:
# Get the top 5 countries with the highest average GDP
top_5_highest_gdp = df_modified.nlargest(5, 'Average_GDP')

top_5_highest_gdp

In [None]:
#get the last 5 with lowest gdp 
bottom_5_lowest_gdp = df_modified.nsmallest(5, 'Average_GDP')
bottom_5_lowest_gdp

In [None]:

#Bar Chart
# Categorical Graph: A bar chart comparing average, first-year, and last-year GDP for the top 5 countries.
def Bar_Top_5(data):
    bar_width = 0.25
    index = range(len(data))
    print("index",index)
    # plt.ylim(0, 12000)
    # Create bars for Average GDP, First Year Value, and Last Year Value
    
    plt.bar(index, data['Average_GDP'], bar_width, label='Average GDP')
    plt.bar([i + bar_width for i in index], data['1980'], bar_width, label='1980')
    plt.bar([i + 2 * bar_width for i in index], data['2028'], bar_width, label='2028')
    
    # Adding labels and title
    plt.xlabel('Countries')
    plt.ylabel('GDP Value')
    plt.title('Top 5 Countries by Average GDP')
    plt.xticks([i + bar_width for i in index], data['Country'])
    plt.legend()
    
    # Show the plot
    plt.show()
# Calling function 
Bar_Top_5(top_5_highest_gdp)


In [None]:
highest_index = df_modified['Average_GDP'].idxmax()
lowest_index = df_modified['Average_GDP'].idxmin()

# Create a new DataFrame with the highest and lowest Average_GDP
result_df = df_modified.loc[[highest_index, lowest_index]]

result_df.set_index('Country', inplace=True)


df_transposed = result_df.T

# Drop the Average_GDP row if it exists

df_transposed = df_transposed.drop('Average_GDP')

print(df_transposed)

In [None]:
# Relational Graph: This could be a line or scatter plot showing GDP trends over time for selected countries.
def plot_gdp_trends(data):
    plt.figure(figsize=(10, 6))
    plt.plot(data.index, data['Luxembourg'], label='Luxembourg', marker='o')
    plt.plot(data.index, data['Afghanistan'], label='Afghanistan', marker='o')
    
    # Adding title and labels
    plt.title('GDP Over the Years: Luxembourg vs. Afghanistan')
    plt.xlabel('Year')
    plt.ylabel('GDP (in USD)')
    plt.xticks(rotation=45)
    plt.legend()
    
    
    
    # Show the plot
    plt.show()
plot_gdp_trends(df_transposed)

In [None]:
highest_index = df_modified['Average_GDP'].idxmax()
gdp_data = df_modified.iloc[:, 1:]  # Assuming the first column is 'Country' and the rest are years

# Calculate the correlation matrix for GDP values across years
gdp_data = top_5_highest_gdp.iloc[:, 1:]
print('gdp_data',gdp_data)
# Calculate basic descriptive statistics
descriptive_stats = gdp_data.describe()

# Calculate the mean, median, standard deviation, skewness, and kurtosis for each year
stats_summary = pd.DataFrame({
    'Mean': gdp_data.mean(),
    'Median': gdp_data.median(),
    'Standard Deviation': gdp_data.std(),
    'Skewness': gdp_data.apply(skew),
    'Kurtosis': gdp_data.apply(kurtosis)
})
# print("Descriptive Statistics:\n", descriptive_stats)
# print("\nDetailed Statistics Summary:\n", stats_summary)

# Calculate the correlation matrix
# correlation_matrix = gdp_data.corr()
correlation_matrix = gdp_data.corr()
print("\nCorrelation Matrix:\n", correlation_matrix)






In [None]:
#Statistical Graph: Box Plot of GDP Distribution Across Years
# Box plot 
df_world.plot(kind='box', rot=45, color='green', figsize=(10, 6))
plt.title("Distribution of GDP Across Years or Countries")  # Set an informative title
plt.ylabel("GDP Value")  # Label for the y-axis
plt.tight_layout()

# Show the plot
plt.show()