In [1]:
import pandas as pd

# Einlesen der Daten

In [2]:
# URLs der Daten
url_copper = "https://raw.githubusercontent.com/andreashrb/Python_for_ML_Project/main/data/dataset_copper.csv"
url_economic = "https://raw.githubusercontent.com/andreashrb/Python_for_ML_Project/main/data/copper_economic.csv"

## Zeitreihendaten Kupferpreise

In [3]:
df_copper = pd.read_csv(url_copper, index_col=0, decimal=".", thousands=",", parse_dates=['date'])

In [4]:
df_copper.head()

Unnamed: 0,date,lme_copper_cash,lme_copper_three_month,lme_copper_stock
0,2022-04-13,10290.5,10306.0,107000
1,2022-04-12,10201.0,10226.0,104350
2,2022-04-11,10235.5,10254.0,105600
3,2022-04-08,10390.0,10380.0,103775
4,2022-04-07,10292.0,10301.0,101275


## Ökonomische Indikatoren für den Kupferpreis

In [5]:
df_economic = pd.read_csv(url_economic, index_col=0, encoding='utf-8')

In [6]:
df_economic.head()

Unnamed: 0,Jahr,CN_BIP pro Kopf in USD,US_BIP pro Kopf in USD,Global_Nachfrage in Mio. Tonnen,EU_Produktionsmenge in 1000 Tonnen,AU_Produktionsmenge in 1000 Tonnen,CL_Produktionsmenge in 1000 Tonnen,CN_Produktionsmenge in 1000 Tonnen,MX_Produktionsmenge in 1000 Tonnen,PE_Produktionsmenge in 1000 Tonnen,...,US_Verbrauch in 1000 Tonnen,AU_Reserven in Mio. Tonnen,CL_Reserven in Mio. Tonnen,CN_Reserven in Mio. Tonnen,US_Reserven in Mio. Tonnen,CG_Reserven in Mio. Tonnen,MX_Reserven in Mio. Tonnen,PE_Reserven in Mio. Tonnen,RU_Reserven in Mio. Tonnen,ZM_Reserven in Mio. Tonnen
32,2012,6282.71,51736.74,20.48,3882.0,958.0,5430.0,1630.0,440.0,1300.0,...,1832.0,86.0,190.0,30.0,39.0,20.0,38.0,76.0,30.0,20.0
33,2013,7039.57,53245.52,21.42,3950.0,990.0,5780.0,1600.0,480.0,1380.0,...,1828.0,87.0,190.0,30.0,39.0,20.0,38.0,70.0,30.0,20.0
34,2014,7645.88,55083.51,22.93,3960.0,970.0,5750.0,1760.0,515.0,1380.0,...,1827.0,93.0,209.0,30.0,35.0,20.0,38.0,68.0,30.0,20.0
35,2015,8034.29,56729.68,23.06,3704.0,971.0,5760.0,1710.0,594.0,1700.0,...,1829.0,88.0,210.0,30.0,33.0,20.0,46.0,82.0,30.0,20.0
36,2016,8063.45,57839.99,23.49,3634.0,948.0,5550.0,1900.0,752.0,2350.0,...,1869.0,89.0,210.0,28.0,35.0,20.0,46.0,81.0,30.0,20.0


## Joinen der Datensätze

In [7]:
df_copper.dtypes

date                      datetime64[ns]
lme_copper_cash                  float64
lme_copper_three_month           float64
lme_copper_stock                   int64
dtype: object

In [8]:
# Extrahieren des Jahres aus der Datetime-Spalte
df_copper['year'] = pd.DatetimeIndex(df_copper['date']).year

In [9]:
df_copper.head()

Unnamed: 0,date,lme_copper_cash,lme_copper_three_month,lme_copper_stock,year
0,2022-04-13,10290.5,10306.0,107000,2022
1,2022-04-12,10201.0,10226.0,104350,2022
2,2022-04-11,10235.5,10254.0,105600,2022
3,2022-04-08,10390.0,10380.0,103775,2022
4,2022-04-07,10292.0,10301.0,101275,2022


In [10]:
df_copper.dtypes

date                      datetime64[ns]
lme_copper_cash                  float64
lme_copper_three_month           float64
lme_copper_stock                   int64
year                               int64
dtype: object

In [11]:
df_economic.dtypes

Jahr                                    int64
CN_BIP pro Kopf in USD                float64
US_BIP pro Kopf in USD                float64
Global_Nachfrage in Mio. Tonnen       float64
EU_Produktionsmenge in 1000 Tonnen    float64
AU_Produktionsmenge in 1000 Tonnen    float64
CL_Produktionsmenge in 1000 Tonnen    float64
CN_Produktionsmenge in 1000 Tonnen    float64
MX_Produktionsmenge in 1000 Tonnen    float64
PE_Produktionsmenge in 1000 Tonnen    float64
RU_Produktionsmenge in 1000 Tonnen    float64
ZM_Produktionsmenge in 1000 Tonnen    float64
US_Produktionsmenge in 1000 Tonnen    float64
CN_Verbrauch in 1000 Tonnen           float64
EU_Verbrauch in 1000 Tonnen           float64
JP_Verbrauch in 1000 Tonnen           float64
US_Verbrauch in 1000 Tonnen           float64
AU_Reserven in Mio. Tonnen            float64
CL_Reserven in Mio. Tonnen            float64
CN_Reserven in Mio. Tonnen            float64
US_Reserven in Mio. Tonnen            float64
CG_Reserven in Mio. Tonnen        

In [15]:
# Joinen der beiden Datensätze per Right-Join
df_copper_economic = pd.merge(left=df_copper, right=df_economic, how='right', left_on='year', right_on='Jahr')
df_copper_economic = df_copper_economic.drop('Jahr', axis = 1)

In [16]:
df_copper_economic.head()

Unnamed: 0,date,lme_copper_cash,lme_copper_three_month,lme_copper_stock,year,CN_BIP pro Kopf in USD,US_BIP pro Kopf in USD,Global_Nachfrage in Mio. Tonnen,EU_Produktionsmenge in 1000 Tonnen,AU_Produktionsmenge in 1000 Tonnen,...,US_Verbrauch in 1000 Tonnen,AU_Reserven in Mio. Tonnen,CL_Reserven in Mio. Tonnen,CN_Reserven in Mio. Tonnen,US_Reserven in Mio. Tonnen,CG_Reserven in Mio. Tonnen,MX_Reserven in Mio. Tonnen,PE_Reserven in Mio. Tonnen,RU_Reserven in Mio. Tonnen,ZM_Reserven in Mio. Tonnen
0,2012-12-31,7915.0,7930.0,320050,2012,6282.71,51736.74,20.48,3882.0,958.0,...,1832.0,86.0,190.0,30.0,39.0,20.0,38.0,76.0,30.0,20.0
1,2012-12-28,7870.0,7875.0,318050,2012,6282.71,51736.74,20.48,3882.0,958.0,...,1832.0,86.0,190.0,30.0,39.0,20.0,38.0,76.0,30.0,20.0
2,2012-12-27,7871.5,7900.0,318000,2012,6282.71,51736.74,20.48,3882.0,958.0,...,1832.0,86.0,190.0,30.0,39.0,20.0,38.0,76.0,30.0,20.0
3,2012-12-24,7780.0,7801.5,317350,2012,6282.71,51736.74,20.48,3882.0,958.0,...,1832.0,86.0,190.0,30.0,39.0,20.0,38.0,76.0,30.0,20.0
4,2012-12-21,7768.5,7792.0,312400,2012,6282.71,51736.74,20.48,3882.0,958.0,...,1832.0,86.0,190.0,30.0,39.0,20.0,38.0,76.0,30.0,20.0
