# What I did for shortening the names of variables:
    1. Remove redundant prefixes (e.g., Weekly_1_)
    2. Abbreviate geographic locations (e.g., East Coast -> PADD1)
    3. Abbreviate common terms (e.g., Crude Oil -> Crude, Ending Stocks -> Stocks)
    4. Remove unit descriptions (e.g., Thousand Barrels)

In [4]:
import pandas as pd
import re

def shorten_column_names(df):
    def clean_name(name):
        # Skip short names (less than 20 chars and fewer than 3 words) without processing
        if len(name) < 20 and name.count(' ') < 3 and 'Weekly' not in name:
            return name

        # 1. Remove redundant prefixes
        name = re.sub(r'^(Weekly_\d+_|PSW\d+_Weekly_\d+_)', '', name)
        name = re.sub(r'^Weekly ', '', name)

        # 2. Geographic location abbreviations
        geo_map = {
            'East Coast (PADD 1)': 'PADD1',
            'Midwest (PADD 2)': 'PADD2',
            'Gulf Coast (PADD 3)': 'PADD3',
            'Rocky Mountain (PADD 4)': 'PADD4',
            'Rocky Mountains (PADD 4)': 'PADD4',
            'West Coast (PADD 5)': 'PADD5',
            'U.S.': 'US',
            'Alaska': 'AK',
            'Lower 48 States': 'L48',
        }
        for k, v in geo_map.items():
            name = name.replace(k, v)

        # 3. Remove units and parenthetical content
        name = re.sub(r'\s*\(Thousand Barrels.*?\)', '', name)
        name = re.sub(r'\s*\(Percent\)', 'Pct', name)
        name = re.sub(r'\s*\(.*?\)', '', name)  # Remove other parenthetical content

        # 4. Common phrase abbreviations
        replacements = {
            'Ending Stocks': 'Stocks',
            'Crude Oil': 'Crude',
            'Distillate Fuel Oil': 'Distillate',
            'Residual Fuel Oil': 'Residual',
            'Kerosene-Type Jet Fuel': 'JetFuel',
            'Finished Motor Gasoline': 'FinGas',  # Distinguish finished gasoline
            'Total Gasoline': 'TotGas',           # Distinguish total gasoline
            'Motor Gasoline': 'Gas',              # General gasoline
            'Gasoline Blending Components': 'GasBlend',
            'Fuel Ethanol': 'Ethanol',
            'Petroleum Products': 'PetProd',
            'Natural Gas Plant Liquids': 'NGPL',
            'Oxygenates': 'Oxy',
            'Renewable Fuels': 'Renew',
            'Refiner and Blender': 'RefBl',
            'Refiner, Blender, and Gas Plant': 'RefBlGas',
            'Refiner': 'Ref',
            'Net Production': 'NetProd',
            'Field Production': 'FieldProd',
            'Gross Inputs': 'GrossIn',
            'Net Input': 'NetIn',
            'Net Imports': 'NetImp',
            'Imports': 'Imp',
            'Exports': 'Exp',
            'Stock Change': 'StkChg',
            'Product Supplied': 'ProdSup',
            'Supply Adjustment': 'SupAdj',
            'Unaccounted for': 'Unacc',
            'Operable Crude Oil Distillation Capacity': 'Cap',
            'Percent Utilization of Refinery Operable Capacity': 'UtilPct',
            'Strategic Petroleum Reserve': 'SPR',
            'excluding': 'Ex',
            'Excluding': 'Ex',
            'less': 'Less',
            'Other Oils': 'Other',
            'Unfinished Oils': 'Unfin',
            'Propane and Propylene': 'Propane',
            '4-Week Avg': '4W',
            'Reformulated': 'Ref',
            'Conventional': 'Conv',
            'Greater than': 'GT',
            '0 to 15 ppm Sulfur': 'LowS',
            '15 to 500 ppm Sulfur': 'MedS',
            '500 ppm Sulfur': 'HighS',
            'Processing Gain': 'ProcGain',
        }
        
        for old, new in replacements.items():
            name = name.replace(old, new)
        
        name = name.replace('Total ', '')  # Remove remaining "Total"

        # 5. Clean up connecting words
        name = name.replace(' of ', ' ')
        name = name.replace(' in ', ' ')
        name = name.replace(' at ', ' ')
        name = name.replace(' and ', ' ')
        
        # 6. Format: Replace non-alphanumeric with underscore, remove extra underscores
        name = re.sub(r'[^a-zA-Z0-9]', '_', name)
        name = re.sub(r'_+', '_', name)
        name = name.strip('_')
        
        return name

    # Apply transformation
    df.columns = [clean_name(c) for c in df.columns]
    return df

# Read data and apply
df = pd.read_csv('cleaned_oil_prediction_data.csv')
df = shorten_column_names(df)
df.to_csv('shortened_oil_data.csv', index=False)
