In [7]:
import requests
import zipfile
import io
import pandas as pd
import os

# URL for the World Development Indicators zip file
url = 'https://databankfiles.worldbank.org/public/ddpext_download/WDI_CSV.zip'

# Step 1: Download the zip file
response = requests.get(url)
response.raise_for_status()  # Check for request errors

dataframes = {}  # Dictionary to store DataFrames

with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    # List files in the zip archive
    file_list = z.namelist()
    print("Files in the zip archive:", file_list)
    
    # Iterate through the files in the archive
    for file_name in file_list:
        # Only process CSV files
        if file_name.endswith('.csv'):
            # Extract the name of the file (without extension) to use as a variable name
            var_name = os.path.splitext(os.path.basename(file_name))[0]
            
            # Read the CSV file into a DataFrame
            with z.open(file_name) as f:
                df = pd.read_csv(f)
                # Store the DataFrame in the dictionary
                dataframes[var_name] = df
                # Also set as a global variable (environment variable equivalent)
                globals()[var_name] = df

# Step 3: Display the first few rows of each DataFrame
for var_name, df in dataframes.items():
    print(f"DataFrame for {var_name}:")
    print(df.head())

# Example access
# You can now access the DataFrames using the variable names (both dictionary and global variable)
print(WDICSV.head())

Files in the zip archive: ['WDICSV.csv', 'WDICountry.csv', 'WDISeries.csv', 'WDIcountry-series.csv', 'WDIfootnote.csv', 'WDIseries-time.csv']
DataFrame for WDICSV:
                  Country Name Country Code  \
0  Africa Eastern and Southern          AFE   
1  Africa Eastern and Southern          AFE   
2  Africa Eastern and Southern          AFE   
3  Africa Eastern and Southern          AFE   
4  Africa Eastern and Southern          AFE   

                                      Indicator Name     Indicator Code  1960  \
0  Access to clean fuels and technologies for coo...     EG.CFT.ACCS.ZS   NaN   
1  Access to clean fuels and technologies for coo...  EG.CFT.ACCS.RU.ZS   NaN   
2  Access to clean fuels and technologies for coo...  EG.CFT.ACCS.UR.ZS   NaN   
3            Access to electricity (% of population)     EG.ELC.ACCS.ZS   NaN   
4  Access to electricity, rural (% of rural popul...  EG.ELC.ACCS.RU.ZS   NaN   

   1961  1962  1963  1964  1965  ...       2014       2015       2

In [8]:
data_columns = WDICSV.columns[4:]

# Filter rows where all data columns are not NaN (i.e., they have data)
filtered_df = WDICSV.dropna(subset=data_columns)

# Display the first few rows of the filtered DataFrame
print(filtered_df.head())

# Optionally, save the filtered DataFrame to a new CSV file
filtered_df.to_csv('Filtered_WDI_CSV.csv', index=False)

                    Country Name Country Code  \
56   Africa Eastern and Southern          AFE   
57   Africa Eastern and Southern          AFE   
58   Africa Eastern and Southern          AFE   
145  Africa Eastern and Southern          AFE   
472  Africa Eastern and Southern          AFE   

                                        Indicator Name  Indicator Code  \
56   Age dependency ratio (% of working-age populat...     SP.POP.DPND   
57   Age dependency ratio, old (% of working-age po...  SP.POP.DPND.OL   
58   Age dependency ratio, young (% of working-age ...  SP.POP.DPND.YG   
145  Charges for the use of intellectual property, ...  BM.GSR.ROYL.CD   
472                            GDP (constant 2015 US$)  NY.GDP.MKTP.KD   

             1960          1961          1962          1963          1964  \
56   8.959460e+01  8.987337e+01  9.019172e+01  9.057468e+01  9.095272e+01   
57   5.626944e+00  5.598776e+00  5.571718e+00  5.549702e+00  5.534236e+00   
58   8.366871e+01  8.393699e+

In [13]:
import requests
import zipfile
import io
import pandas as pd
import os

# URL for the World Development Indicators zip file
url = 'https://databankfiles.worldbank.org/public/ddpext_download/WDI_CSV.zip'

# Step 1: Download the zip file
response = requests.get(url)
response.raise_for_status()  # Check for request errors

dataframes = {}  # Dictionary to store DataFrames

with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    # List files in the zip archive
    file_list = z.namelist()
    print("Files in the zip archive:", file_list)
    
    # Iterate through the files in the archive
    for file_name in file_list:
        # Only process CSV files
        if file_name.endswith('.csv'):
            # Extract the name of the file (without extension) to use as a variable name
            var_name = os.path.splitext(os.path.basename(file_name))[0]
            
            # Read the CSV file into a DataFrame
            with z.open(file_name) as f:
                df = pd.read_csv(f)
                # Store the DataFrame in the dictionary
                dataframes[var_name] = df
                # Also set as a global variable (environment variable equivalent)
                globals()[var_name] = df

# Step 2: Filter WDI_CSV.csv to get filtered_df
data_columns = WDICSV.columns[4:]  # Assuming WDICSV is already loaded as a global variable
filtered_df = WDICSV.dropna(subset=data_columns)

# Display the first few rows of the filtered DataFrame
print(filtered_df.head())

# Save the filtered DataFrame to a new CSV file (optional)
filtered_df.to_csv('Filtered_WDI_CSV.csv', index=False)

# Step 3: Get unique Indicator Codes from the filtered DataFrame
unique_indicators = filtered_df['Indicator Code'].unique()

# Step 4: Filter the remaining CSV files based on unique Indicator Codes
def filter_dataframe_by_indicator_code(df, column_name):
    if column_name in df.columns:
        return df[df[column_name].isin(unique_indicators)]
    else:
        print(f"No '{column_name}' column found in DataFrame.")
        return None

filtered_dataframes = {}

for var_name, df in dataframes.items():
    # Determine the appropriate column name to filter on
    if var_name == 'WDICSV':
        continue  # Skip since we've already filtered it
    elif var_name in ['WDICountry', 'WDISeries', 'WDIcountry-series', 'WDIfootnote', 'WDIseries-time']:
        column_name = 'Indicator Code' if 'Indicator Code' in df.columns else 'SeriesCode'
        filtered_df = filter_dataframe_by_indicator_code(df, column_name)
        if filtered_df is not None:
            filtered_dataframes[var_name] = filtered_df
            # Save the filtered DataFrame to a new CSV file (optional)
            output_name = f"Filtered_{var_name}.csv"
            filtered_df.to_csv(output_name, index=False)
            print(f"Filtered data saved to {output_name}")

# Display the first few rows of the filtered DataFrames
for var_name, df in filtered_dataframes.items():
    print(f"Filtered DataFrame for {var_name}:")
    print(df.head())


Files in the zip archive: ['WDICSV.csv', 'WDICountry.csv', 'WDISeries.csv', 'WDIcountry-series.csv', 'WDIfootnote.csv', 'WDIseries-time.csv']
                    Country Name Country Code  \
56   Africa Eastern and Southern          AFE   
57   Africa Eastern and Southern          AFE   
58   Africa Eastern and Southern          AFE   
145  Africa Eastern and Southern          AFE   
472  Africa Eastern and Southern          AFE   

                                        Indicator Name  Indicator Code  \
56   Age dependency ratio (% of working-age populat...     SP.POP.DPND   
57   Age dependency ratio, old (% of working-age po...  SP.POP.DPND.OL   
58   Age dependency ratio, young (% of working-age ...  SP.POP.DPND.YG   
145  Charges for the use of intellectual property, ...  BM.GSR.ROYL.CD   
472                            GDP (constant 2015 US$)  NY.GDP.MKTP.KD   

             1960          1961          1962          1963          1964  \
56   8.959460e+01  8.987337e+01  9.019172