# Load dataset

In [25]:
import pandas as pd


file_path = "world_Bank_Panel_Data.csv"
data = pd.read_csv(file_path)

# Reshape from wide to long

In [26]:

data_long = pd.melt(
    data,
    id_vars=["Series Name", "Series Code", "Country Name", "Country Code", "Scale (Precision)"],
    var_name="Year",
    value_name="Value"
)

# Clean Year column (keep only year numbers)

In [27]:

data_long["Year"] = data_long["Year"].str.extract(r'(\d{4})').astype(float).astype("Int64")



# Drop rows with missing values in important columns

In [28]:

data_long = data_long.dropna(subset=["Country Name", "Year", "Value"])

# Sort for readability

In [29]:


data_long = data_long.sort_values(by=["Country Name", "Series Name", "Year"]).reset_index(drop=True)

print(data_long.head(10))


                                    Series Name        Series Code  \
0  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   
1  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   
2  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   
3  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   
4  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   
5  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   
6  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   
7  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   
8  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   
9  Energy use (kg of oil equivalent per capita)  EG.USE.PCAP.KG.OE   

  Country Name Country Code Scale (Precision)  Year   Value  
0    Indonesia          IDN       Unit (0.00)  1990  461.48  
1    Indonesia          IDN       Unit (0.00)  1991  477.03  
2    Indonesia          IDN       Unit (0.0

In [30]:
gdp_data = data_long[data_long["Series Name"] == "GDP (constant 2015 US$)"]

# Keeping only essential columns

In [31]:

gdp_data = gdp_data[["Country Name", "Country Code", "Year", "Value"]]

In [32]:

gdp_data = gdp_data.rename(columns={"Value": "GDP_constant_2015"})

# Sort for readability

In [33]:

gdp_data = gdp_data.sort_values(by=["Country Name", "Year"]).reset_index(drop=True)

print(gdp_data.head(10))


  Country Name Country Code  Year  GDP_constant_2015
0    Indonesia          IDN  1990       2.699151e+11
1    Indonesia          IDN  1991       2.885716e+11
2    Indonesia          IDN  1992       3.073216e+11
3    Indonesia          IDN  1993       3.272864e+11
4    Indonesia          IDN  1994       3.519637e+11
5    Indonesia          IDN  1995       3.808952e+11
6    Indonesia          IDN  1996       4.106743e+11
7    Indonesia          IDN  1997       4.299754e+11
8    Indonesia          IDN  1998       3.735338e+11
9    Indonesia          IDN  1999       3.764889e+11


# Check all available indicators

In [34]:

print(data_long["Series Name"].unique())

['Energy use (kg of oil equivalent per capita)' 'GDP (constant 2015 US$)'
 'International tourism, number of arrivals' 'Trade (% of GDP)']


# Selecting the indicators

In [35]:

indicators = [
    "GDP (constant 2015 US$)",
    "Energy use (kg of oil equivalent per capita)",
    "Trade (% of GDP)",
    "International tourism, number of arrivals"
]



# Filter and pivot

In [36]:

filtered = data_long[data_long["Series Name"].isin(indicators)]

In [37]:
panel_data = filtered.pivot_table(
    index=["Country Name", "Country Code", "Year"],
    columns="Series Name",
    values="Value"
).reset_index()


In [38]:

panel_data = panel_data.rename(columns={
    "GDP (constant 2015 US$)": "GDP_constant_2015",
    "Energy use (kg of oil equivalent per capita)": "Energy_use_per_capita",
    "Trade (% of GDP)": "Trade_percent_GDP",
    "International tourism, number of arrivals": "Tourism_arrivals"
})

In [39]:

panel_data = panel_data.sort_values(by=["Country Name", "Year"]).reset_index(drop=True)

print(panel_data.head(10))

Series Name Country Name Country Code  Year  Energy_use_per_capita  \
0              Indonesia          IDN  1990                 461.48   
1              Indonesia          IDN  1991                 477.03   
2              Indonesia          IDN  1992                 489.76   
3              Indonesia          IDN  1993                 534.33   
4              Indonesia          IDN  1994                 524.02   
5              Indonesia          IDN  1995                 574.76   
6              Indonesia          IDN  1996                 587.22   
7              Indonesia          IDN  1997                 595.39   
8              Indonesia          IDN  1998                 567.58   
9              Indonesia          IDN  1999                 587.05   

Series Name  GDP_constant_2015  Tourism_arrivals  Trade_percent_GDP  
0                 2.699151e+11               NaN              52.89  
1                 2.885716e+11               NaN              54.84  
2                 3

In [40]:
from linearmodels.panel import PanelOLS

# Setting panel index
panel_data = panel_data.set_index(["Country Name", "Year"])








# Drop rows with missing values

In [41]:

panel_data = panel_data.dropna(subset=["GDP_constant_2015", "Energy_use_per_capita", "Trade_percent_GDP", "Tourism_arrivals"])

# Define dependent (y) and independent variables (X)

In [42]:



y = panel_data["GDP_constant_2015"]
X = panel_data[["Energy_use_per_capita", "Trade_percent_GDP", "Tourism_arrivals"]]

# Fixed Effects (by country)

In [43]:



model_fe = PanelOLS(y, X, entity_effects=True)
results_fe = model_fe.fit()

print(results_fe.summary)


                          PanelOLS Estimation Summary                           
Dep. Variable:      GDP_constant_2015   R-squared:                        0.3648
Estimator:                   PanelOLS   R-squared (Between):              0.3815
No. Observations:                 100   R-squared (Within):               0.3648
Date:                Wed, Sep 24 2025   R-squared (Overall):              0.3805
Time:                        09:00:31   Log-likelihood                   -2628.2
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      17.612
Entities:                           5   P-value                           0.0000
Avg Obs:                       20.000   Distribution:                    F(3,92)
Min Obs:                       20.000                                           
Max Obs:                       20.000   F-statistic (robust):             17.612
                            