In [None]:
import requests
import pandas as pd

def download_file(file_id, destination):
    """
    Downloads a file from Google Drive given its file ID.
    
    Parameters:
        file_id (str): The ID of the file on Google Drive.
        destination (str): The path where the file will be saved.
    """
    URL = "https://drive.google.com/uc?export=download"
    session = requests.Session()
    response = session.get(URL, params={'id': file_id}, stream=True)
    token = next((v for k, v in response.cookies.items() if k.startswith('confirm')), None)
    if token:
        response = session.get(URL, params={'id': file_id, 'confirm': token}, stream=True)
    with open(destination, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    print(f"Downloaded {destination}")

# File IDs and destinations
file_ids = ['16boftNUZ2XxXosnoHoluUcSbLz2XzNXT', '1PCZAdl7t2wyIapwoBAIDlPDsEU0xhiJJ']
destinations = ['data_1.csv', 'data_2.csv']

# Download files
for file_id, destination in zip(file_ids, destinations):
    download_file(file_id, destination)

# Load data
dfs = [pd.read_csv(dest).dropna(how='all') for dest in destinations]

# Column mapping
columns_mapping = {
    'placeName': 'Country',
    'Date:Annual_Consumption_Electricity': 'Date_of_Consumption_and_Loss_Electricity',
    'Value:Annual_Consumption_Electricity': 'Electricity_Consumption',
    'Date:Annual_Loss_Electricity': 'Date_of_Loss_Electricity',  # Updated name
    'Value:Annual_Loss_Electricity': 'Loss_electricity',
    'Date:Annual_Emissions_CarbonDioxide_ElectricityGeneration': 'Year_of_Emissions_of_CarbonDioxide_Electricity',
    'Value:Annual_Emissions_CarbonDioxide_ElectricityGeneration': 'Emissions_CarbonDioxide_ElectricityGeneration'
}

# Rename columns and filter
dfs = [df.rename(columns=columns_mapping).filter(
    items=['Country', 'Date_of_Consumption_and_Loss_Electricity', 'Electricity_Consumption',
           'Loss_electricity', 'Year_of_Emissions_of_CarbonDioxide_Electricity',
           'Emissions_CarbonDioxide_ElectricityGeneration']
) for df in dfs]

# Combine DataFrames
df_combined = pd.concat(dfs).reset_index(drop=True)

# Function to format numbers
def format_number(x):
    if pd.isna(x):
        return x
    if x >= 1e9:
        return f"{x/1e9:.2f}B"
    if x >= 1e6:
        return f"{x/1e6:.2f}M"
    if x >= 1e3:
        return f"{x/1e3:.2f}K"
    return f"{x:.2f}"

# Apply formatting
for column in ['Electricity_Consumption', 'Loss_electricity', 'Emissions_CarbonDioxide_ElectricityGeneration']:
    df_combined[column] = df_combined[column].apply(format_number)

# Convert date columns to integer type
df_combined['Date_of_Consumption_and_Loss_Electricity'] = df_combined['Date_of_Consumption_and_Loss_Electricity'].astype('Int64')
df_combined['Year_of_Emissions_of_CarbonDioxide_Electricity'] = df_combined['Year_of_Emissions_of_CarbonDioxide_Electricity'].astype('Int64')

# Display and save results
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

display(df_combined)
df_combined.to_csv('combined_data_formatted.csv', index=False)