In [201]:
# !pip install py7zr
# !pip install io
# !pip install schedule

In [213]:
import requests, pandas as pd, io, zipfile, os, py7zr, copy

In [203]:
## Download and extract .7z file from url
# URL to the .7z file
url = "https://unctadstat-api.unctad.org/bulkdownload/US.CommodityPrice_M/US_CommodityPrice_M"

# Define a path for saving the downloaded .7z file
file_path = os.path.join(os.getcwd(), 'downloaded_data.7z')

# Send a GET request to the URL
response = requests.get(url, stream=True)  # Stream=True to avoid loading the content into memory at once

# Check if the request was successful
if response.status_code == 200:
    # Write the content to the .7z file
    with open(file_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192): 
            if chunk:  # filter out keep-alive new chunks
                f.write(chunk)

    # Try to extract the .7z file
    with py7zr.SevenZipFile(file_path, mode='r') as z:
        z.extractall(path=os.getcwd())
    print("Extraction complete")
else:
    print(f"Download failed with status code: {response.status_code}")
    

## Retrieves CSV file and saves as DF
    
# List all files in the current directory
extracted_files = os.listdir(os.getcwd())

# You might want to filter for a specific file type, e.g., CSV files
csv_files = [file for file in extracted_files if file.endswith('.csv')]

# Assuming you want to work with the first CSV file
if csv_files:
    df = pd.read_csv(csv_files[0])
    print(df.head())
else:
    print("No CSV files found in the extracted contents.")


Extraction complete
    Period Period Label  Commodity  \
0  1995M01    Jan. 1995  IN0001.20   
1  1995M01    Jan. 1995  IN0001.01   
2  1995M01    Jan. 1995  IN0001.02   
3  1995M01    Jan. 1995  080300.01   
4  1995M01    Jan. 1995  020100.01   

                                     Commodity Label  Prices Prices Footnote  \
0                                         All groups     NaN             NaN   
1                                           All food     NaN             NaN   
2                                               Food     NaN             NaN   
3  Bananas, Central and South America, FOT, US im...    0.35             NaN   
4  Beef, Australia/New Zealand, frozen, CIF US po...    2.12             NaN   

  Prices Missing value  
0       Not applicable  
1       Not applicable  
2       Not applicable  
3                  NaN  
4                  NaN  


In [204]:
df.head()

Unnamed: 0,Period,Period Label,Commodity,Commodity Label,Prices,Prices Footnote,Prices Missing value
0,1995M01,Jan. 1995,IN0001.20,All groups,,,Not applicable
1,1995M01,Jan. 1995,IN0001.01,All food,,,Not applicable
2,1995M01,Jan. 1995,IN0001.02,Food,,,Not applicable
3,1995M01,Jan. 1995,080300.01,"Bananas, Central and South America, FOT, US im...",0.35,,
4,1995M01,Jan. 1995,020100.01,"Beef, Australia/New Zealand, frozen, CIF US po...",2.12,,


In [205]:
df.iloc[-1]

Period                                                            2023M12
Period Label                                                    Dec. 2023
Commodity                                                       271100.01
Commodity Label         Natural gas, index, Europe, United States and ...
Prices                                                              95.16
Prices Footnote                                                       NaN
Prices Missing value                                                  NaN
Name: 19015, dtype: object

In [206]:
# df = df[df["Period"]=="2023M08"]

# df

In [207]:
df[df["Prices Missing value"]=="Not applicable"]["Commodity Label"].unique()

array(['All groups', 'All food', 'Food', 'Tropical beverages',
       'Vegetable oilseeds and oils', 'Agricultural raw materials',
       'Minerals, ores and metals', 'Fuels'], dtype=object)

In [208]:
groups = df["Commodity Label"].unique()
print(len(groups))
[print("Group : " + i) for i in groups]

58
Group : All groups
Group : All food
Group : Food
Group : Bananas, Central and South America, FOT, US import price ($/kg)
Group : Beef, Australia/New Zealand, frozen, CIF US ports ($/kg)
Group : Maize, Argentina, Rosario, Up River, FOB ($/t)
Group : Maize, United States, n° 3 yellow, FOB Gulf ($/t)
Group : Rice, Thailand, white milled, 5% broken, export price, FOB ($/t)
Group : Salmon, fresh, fish-farm bred, export price, Norway ($/kg)
Group : Shrimps, brown, no. 1, shell-on, headless, Mexico ($/kg)
Group : Soybean meal, in bulk, 45/46% protein, Hamburg FOB ex-mill ($/t)
Group : Sugar, average of I.S.A. daily prices, FOB & Stowed, in bulk, Caribbean ports (¢/lb.)
Group : Wheat, Argentina, Trigo Pan, Up River, FOB ($/t)
Group : Wheat, United States, n° 2 Hard Red Winter (ordinary), FOB Gulf ($/t)
Group : Tropical beverages
Group : Cocoa beans, average daily prices New York/London (¢/lb.)
Group : Coffee, Brazilian and other natural Arabicas, ex-dock USA (¢/lb.)
Group : Coffee, Colombia

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [209]:
df = df[["Period", "Commodity Label", "Prices"]]
df

Unnamed: 0,Period,Commodity Label,Prices
0,1995M01,All groups,
1,1995M01,All food,
2,1995M01,Food,
3,1995M01,"Bananas, Central and South America, FOT, US im...",0.35
4,1995M01,"Beef, Australia/New Zealand, frozen, CIF US po...",2.12
...,...,...,...
19011,2023M12,"Zinc, Prime Western, delivered, North America ...",135.00
19012,2023M12,Fuels,
19013,2023M12,"Crude oil, Dubai, medium, Fateh API 32°, spot ...",77.22
19014,2023M12,"Crude oil, UK Brent, light blend API 38°, spot...",77.85


In [210]:
us_groups = [group for group in groups if "US" in group or "United States" in group or "North America" in group]
us_groups = [value for index, value in enumerate(us_groups) if index not in [0, 1, 4, 5, 6, 7, 10, 11, 13]]
us_groups

['Maize, United States, n° 3 yellow, FOB Gulf ($/t)',
 'Wheat, United States, n° 2 Hard Red Winter (ordinary), FOB Gulf ($/t)',
 'Soybeans, in bulk, United States, n° 2 yellow, CIF Rotterdam ($/t)',
 "Cattle hides, US Chicago packer's heavy native steers, FOB shipping point (¢/lb.)",
 'Natural gas, index, Europe, United States and Japan (2010=100)']

In [211]:
df = df[df["Commodity Label"].isin(us_groups)].reset_index(drop=True)
df = df.pivot_table(index='Period', columns='Commodity Label', values='Prices', aggfunc='first').reset_index()
df.columns.name = None
df.rename(columns={"Period": "Date", 
                    df.columns[1]: "Cattle Hide (Chicago)",
                    df.columns[2]: "Maize (US)",
                    df.columns[3]: "Natural Gas (US)",
                    df.columns[4]: "Soybeans (US)",
                    df.columns[5]: "Wheat (US)"
                   }, inplace=True)

In [214]:
commodity_prices = copy.deepcopy(df)
commodity_prices.head()

Unnamed: 0,Date,Cattle Hide (Chicago),Maize (US),Natural Gas (US),Soybeans (US),Wheat (US)
0,1995M01,90.12,109.1,33.25,243.0,159.8
1,1995M02,91.42,108.1,34.13,238.0,155.2
2,1995M03,97.95,110.3,33.64,246.0,149.8
3,1995M04,102.32,111.6,34.89,249.0,151.0
4,1995M05,99.61,113.6,35.18,250.0,162.7
