# Data Pipeline - Mining company project
This notebook automates the loading, processing, and saving of mining-financial KPIs.

In [10]:
import pandas as pd

### Upload data

In [18]:
# Upload simulated data
fi_df = pd.read_csv('sap_fi_export.csv', sep = ";")
rail_df = pd.read_excel('rail_transport_data.xlsx')
wms_df = pd.read_csv('wms_dispatch_log.csv', sep = ";")

### SAP FI Data Processing

In [21]:
fi_summary = fi_df.groupby('CostCenter').agg({
    'TonsMoved': 'sum',
    'OperationCost': 'sum'
}).reset_index()

fi_summary['AverageCostPerTon'] = (
    fi_summary['OperationCost'] / fi_summary['TonsMoved']
)
fi_summary

Unnamed: 0,CostCenter,TonsMoved,OperationCost,AverageCostPerTon
0,FERROCARRIL,143472,4536821088,31621.648043
1,MINA_SUR,110610,2962266936,26781.185571
2,PUERTO_NORTE,112845,2828645814,25066.647295


### Rail Transport Processing

In [22]:
rail_df['TotalCost'] = rail_df['TonsTransported'] * rail_df['CostPerTon']
rail_summary = rail_df[['Date', 'TonsTransported', 'TotalCost']]
rail_summary.head()

Unnamed: 0,Date,TonsTransported,TotalCost
0,2025-01-01,7636,75348000.92
1,2025-01-02,4059,60220541.7
2,2025-01-03,8279,90052835.54
3,2025-01-04,4722,38868481.92
4,2025-01-05,6314,65763467.0


### Port data processing

In [23]:
wms_df['LoadingEfficiency'] = wms_df['LoadedTons'] / wms_df['LoadingTimeHours']
wms_df.head()

Unnamed: 0,Date,VesselID,LoadedTons,LoadingTimeHours,Port,LoadingEfficiency
0,1/01/25,SHIP300,12557,17.0,PUERTO_NORTE,738.647059
1,2/01/25,SHIP301,48360,28.1,PUERTO_NORTE,1720.996441
2,3/01/25,SHIP302,12200,20.6,PUERTO_NORTE,592.23301
3,4/01/25,SHIP303,78497,18.1,PUERTO_NORTE,4336.850829
4,5/01/25,SHIP304,56975,19.1,PUERTO_NORTE,2982.984293


### Saving processed outputs

In [24]:
# Create output folder
import os
os.makedirs("outputs", exist_ok=True)

# Save files
fi_summary.to_csv('outputs/fi_summary.csv', index=False)
rail_summary.to_csv('outputs/rail_summary.csv', index=False)
wms_df.to_csv('outputs/wms_enriched.csv', index=False)

print("✔ Data processed and saved in the 'outputs' folder/'")

✔ Data processed and saved in the 'outputs' folder/'
