In [11]:
from database import get_database_prod
import pandas as pd
import yfinance as yf
from datetime import datetime

In [3]:
def get_market_year(row): #make a market year row in datas
    year = row['Date'].year
    month = row['Date'].month
    if month >= 6: #month starting market year
        market_year = f"{year}/{str(year + 1)[-2:]}"
    else:
        market_year = f"{year - 1}/{str(year)[-2:]}"
    return market_year

In [4]:
def get_continuous_wheat_prices():
    zw = pd.read_csv("files/ZWFutures.csv")
    zw = zw.rename(columns={"Date Time": "Date"})
    zw["Date"] = pd.to_datetime(zw["Date"])
    zw["MarketYear"] = zw.apply(get_market_year, axis=1)
    #continuous is simple : current contract is the one in front, roll is done when current contract expires
    zw_continuous = zw.groupby("Date").first().reset_index()
    zw_continuous["Year-Month"] = zw_continuous["Date"].dt.to_period("M")
    return zw_continuous

In [5]:
zw_continuous = get_continuous_wheat_prices()

y : CBOT Wheat price (monthly avergae)

In [27]:
zw_monthly_average = zw_continuous[zw_continuous["Year-Month"] != "2025-04"].resample("ME", on="Date").agg({
    "Date": "first",
    "Close": "mean",
    "MarketYear": "first",
    "Year-Month": "first"
})
zw_monthly_average.head()

Unnamed: 0_level_0,Date,Close,MarketYear,Year-Month
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1979-01-31,1979-01-08,341.625,1978/79,1979-01
1979-02-28,1979-02-01,345.092105,1978/79,1979-02
1979-03-31,1979-03-01,347.488636,1978/79,1979-03
1979-04-30,1979-04-02,350.6625,1978/79,1979-04
1979-05-31,1979-05-01,383.306818,1978/79,1979-05


US and World Stock Use Ratio

In [52]:
#get usda data from db
usda_collection = get_database_prod()["agri_data"]["wasde"]
usda_europe = usda_collection.find({
    "Region" : {"$in": ["World", "United States"]}
})
usda_df = pd.DataFrame(list(usda_europe))

#get old data from txt
old_usda_report = pd.read_csv("files/old_yearly_usda_wheat_report.txt") 

#perform transformation for manipulation
usda_df = usda_df.rename(columns={"ReleaseDate": "Date"})
usda_df["Date"] = pd.to_datetime(usda_df["Date"])
usda_df["Year-Month"] = usda_df["Date"].dt.to_period("M")
usda_df["ReleaseDateMY"] = usda_df.apply(get_market_year, axis=1)
usda_df = usda_df[usda_df["Commodity"] == "Wheat"]

#same for stock to use needed values
usda_wheat_stckuse = usda_df[(usda_df["Attribute"].isin(["Domestic Total", "Exports", "Ending Stocks"])) & (usda_df["ReportTitle"] == "World Wheat Supply and Use") & (usda_df["ProjEstFlag"].isna())][["Date", "Region", "MarketYear", "Attribute", "Value", "Year-Month", "ProjEstFlag"]].reset_index(drop=True)
usda_wheat_stckuse["Year-Month"] = (usda_wheat_stckuse["Date"] - pd.DateOffset(years=2)).dt.to_period("M")
usda_wheat_stckuse["Date"] = usda_wheat_stckuse["Date"] - pd.DateOffset(years=2)

#pivot data and filter on region
usda_wheat_stckuse = usda_wheat_stckuse.pivot_table(index=['Date', 'Region', 'MarketYear', 'Year-Month'], 
                           columns='Attribute', 
                           values='Value').reset_index()
us_wheat_stckuse = usda_wheat_stckuse[usda_wheat_stckuse["Region"] == "United States"]
world_wheat_stckuse = usda_wheat_stckuse[usda_wheat_stckuse["Region"] == "World"]

old_usda_report = old_usda_report.pivot_table(index=['Region', 'MarketYear', "Year"], 
                           columns='Attribute', 
                           values='Value').reset_index()
old_us_wheat_stckuse = old_usda_report[old_usda_report["Region"] == "United States"]
old_world_wheat_stckuse = old_usda_report[old_usda_report["Region"] == "World"]

#groupby market year to get only the last market year numbers 
us_last_wheat_stckuse = us_wheat_stckuse.groupby("MarketYear").last().reset_index()
world_last_wheat_stckuse = world_wheat_stckuse.groupby("MarketYear").last().reset_index()

#concat old and new dataset
us_last_wheat_stckuse = pd.concat([old_us_wheat_stckuse, us_last_wheat_stckuse])
world_last_wheat_stckuse = pd.concat([old_world_wheat_stckuse, world_last_wheat_stckuse])

#add date field when NaN
us_last_wheat_stckuse.loc[us_last_wheat_stckuse["Date"].isna(), "Date"] = pd.to_datetime(us_last_wheat_stckuse.loc[us_last_wheat_stckuse["Date"].isna(), "Year"].astype(str).str[:-2] + "-04-10", format="%Y-%m-%d")
us_last_wheat_stckuse = us_last_wheat_stckuse.reset_index(drop=True)

world_last_wheat_stckuse.loc[world_last_wheat_stckuse["Date"].isna(), "Date"] = pd.to_datetime(world_last_wheat_stckuse.loc[world_last_wheat_stckuse["Date"].isna(), "Year"].astype(str).str[:-2] + "-04-10", format="%Y-%m-%d")
world_last_wheat_stckuse = world_last_wheat_stckuse.reset_index(drop=True)

#perform stock to use calc
us_last_wheat_stckuse["StckUse"] = (us_last_wheat_stckuse["Ending Stocks"] / (us_last_wheat_stckuse["Domestic Total"] + us_last_wheat_stckuse["Exports"])) * 100
world_last_wheat_stckuse["StckUse"] = (world_last_wheat_stckuse["Ending Stocks"] / (world_last_wheat_stckuse["Domestic Total"] + world_last_wheat_stckuse["Exports"])) * 100

#merge dfs, rename field and keep wanted data
stck_to_use = pd.merge(us_last_wheat_stckuse, world_last_wheat_stckuse, on=["MarketYear", "Date"], how="inner")
stck_to_use = stck_to_use.rename(columns={
    "StckUse_x": "StckUse US",
    "StckUse_y": "StckUse World",
})
stck_to_use = stck_to_use[["Date", "MarketYear", "StckUse US", "StckUse World"]]
stck_to_use["Year-Month"] = stck_to_use["Date"].dt.to_period("M")

In [53]:
stck_to_use.head()

Attribute,Date,MarketYear,StckUse US,StckUse World,Year-Month
0,1981-04-10,1980/81,43.17817,13.799448,1981-04
1,1982-04-10,1981/82,44.335664,15.40404,1982-04
2,1983-04-10,1982/83,62.707224,16.8169,1983-04
3,1984-04-10,1983/84,55.084623,16.920859,1984-04
4,1985-04-10,1984/85,55.27366,20.530852,1985-04


WTI Crude oil prices (monthly average)

In [3]:
cl = pd.read_csv("files/CL-1983-Today.csv")[:-1]
cl["Date"] = pd.to_datetime(cl["Date"])
cl["Year-Month"] = cl["Date"].dt.to_period("M")
cl_monthly_mean = cl.resample("ME", on="Date").agg({
    "Date": "first",
    "Close": "mean",
    "Year-Month": "first"
})
cl_monthly_mean = cl_monthly_mean[1:-1]
cl_monthly_mean.head()

Unnamed: 0_level_0,Date,Close,Year-Month
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1983-04-30,1983-04-04,30.5005,1983-04
1983-05-31,1983-05-02,30.14381,1983-05
1983-06-30,1983-06-01,30.929545,1983-06
1983-07-31,1983-07-01,31.5675,1983-07
1983-08-31,1983-08-01,31.882174,1983-08


Urea prices (monthly average)

In [5]:
urea = pd.read_excel("files/world-bank-monthly-prices.xlsx", skiprows=4, sheet_name="Monthly Prices")
urea = urea[1:]
urea = urea.rename(columns={"Unnamed: 0": "Date"})
urea["Date"] = pd.to_datetime(urea["Date"], format="%YM%m")
urea = urea[["Date", "Urea "]]
urea["Year-Month"] = urea["Date"].dt.to_period("M")
urea.tail()

Unnamed: 0,Date,Urea,Year-Month
779,2024-11-01,352.3,2024-11
780,2024-12-01,352.0,2024-12
781,2025-01-01,380.5,2025-01
782,2025-02-01,436.5,2025-02
783,2025-03-01,394.5,2025-03


Dollar index (monthly average)

In [30]:
dollar = yf.download("DX-Y.NYB", start="1970-01-01", period="1mo").droplevel(1, axis=1)
monthly_dollar_mean = dollar.resample("ME").agg({
    'Close': 'mean',
}).reset_index()
monthly_dollar_mean["Year-Month"] = monthly_dollar_mean["Date"].dt.to_period("M")
monthly_dollar_mean = monthly_dollar_mean[:-1]
monthly_dollar_mean.tail()

[*********************100%***********************]  1 of 1 completed


Price,Date,Close,Year-Month
646,2024-11-30,105.8435,2024-11
647,2024-12-31,107.19,2024-12
648,2025-01-31,108.609047,2025-01
649,2025-02-28,107.353157,2025-02
650,2025-03-31,104.187143,2025-03


Managed money net positions (monthly average) and his categorical variable

In [71]:
cot_wheat_col = get_database_prod()["cot_data"]["us_commodity"]
cot_wheat_cursor = cot_wheat_col.find({
    "Market_and_Exchange_Names" : "WHEAT-SRW - CHICAGO BOARD OF TRADE"
    }).sort("Date", 1)
cot_wheat = pd.DataFrame(list(cot_wheat_cursor))[["Date", "M_Money_Positions_Long_All", "M_Money_Positions_Short_All"]]
cot_wheat["MM_Net"] = cot_wheat["M_Money_Positions_Long_All"] - cot_wheat["M_Money_Positions_Short_All"]
cot_wheat_monthly_average = cot_wheat.resample("ME", on="Date").agg({
    "MM_Net": "mean"
}).reset_index()
cot_wheat_monthly_average["Year-Month"] = cot_wheat_monthly_average["Date"].dt.to_period("M")
cot_wheat_monthly_average["MM_Categorical"] = 1

cot_categ = pd.DataFrame(index=pd.Index(pd.date_range(start='1/1/1970', end="1/1/2005", freq='ME'), name="Date"))
cot_categ["MM_Categorical"] = 0
cot_categ["MM_Net"] = 0
cot_categ = cot_categ.reset_index()
cot_categ["Year-Month"] = cot_categ["Date"].dt.to_period("M")

cot_wheat_monthly_average_categorical = pd.concat([cot_categ, cot_wheat_monthly_average])

cot_wheat_monthly_average_categorical.head()

Unnamed: 0,Date,MM_Categorical,MM_Net,Year-Month
0,1970-01-31,0,0.0,1970-01
1,1970-02-28,0,0.0,1970-02
2,1970-03-31,0,0.0,1970-03
3,1970-04-30,0,0.0,1970-04
4,1970-05-31,0,0.0,1970-05


Seasonality weighting

In [28]:
zw_monthly_average_season = zw_monthly_average[~zw_monthly_average["MarketYear"].isin(["1978/79", "2024/25"])]
zw_monthly_average_season["Month"] = zw_monthly_average_season["Date"].dt.month
zw_monthly_average_season = zw_monthly_average_season[["Month", "Close"]].groupby("Month").mean()

zw_overall_avg = zw_continuous['Close'].mean()

zw_seasonality_weight = zw_monthly_average_season / zw_overall_avg
zw_seasonality_weight.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  zw_monthly_average_season["Month"] = zw_monthly_average_season["Date"].dt.month


Unnamed: 0_level_0,Close
Month,Unnamed: 1_level_1
1,1.012663
2,1.008943
3,1.010058
4,0.996753
5,1.005457


Business Cycle

In [36]:
#Paper time interval : 1985 to 1998, 1999 to 2006 and 2007 to 2019. We add 2020 to today
interval_map = {
    0: ["1985-01-01", "1998-12-31"],
    1: ["1999-01-01", "2006-12-31"],
    2: ["2007-01-01", "2019-12-31"],
    3: ["2020-01-01", datetime.today().strftime("%Y-%m-%d")]
}
business_rank = dict()
cycle_weights = dict()
for k, v in interval_map.items():
    cycle_mean = zw_continuous[(zw_continuous["Date"] >= v[0]) & (zw_continuous["Date"] <= v[1])]["Close"].mean()
    business_rank[cycle_mean] = v

max_key = max(business_rank, key=business_rank.get)

for key, value in business_rank.items():
    weight = key / max_key
    cycle_weights[weight] = value

business_cycle_weights = pd.DataFrame(index=pd.Index(pd.date_range(start='1/1/1985', end=datetime.today().strftime("%d/%m/%Y"), freq='ME'), name="Date")).reset_index()
business_cycle_weights["Year-Month"] = business_cycle_weights["Date"].dt.to_period("M")

for kk, vv in cycle_weights.items():
    business_cycle_weights.loc[(business_cycle_weights["Date"] >= vv[0]) & 
                           (business_cycle_weights["Date"] <= vv[1]), 
                           "Business_Cycle_Weight"] = kk
    
business_cycle_weights.head()

Unnamed: 0,Date,Year-Month,Business_Cycle_Weight
0,1985-01-31,1985-01,0.526521
1,1985-02-28,1985-02,0.526521
2,1985-03-31,1985-03,0.526521
3,1985-04-30,1985-04,0.526521
4,1985-05-31,1985-05,0.526521


Political risks and uncertainties

In [51]:
#Event in the paper : Black Monday in oct 1987, US Savings and Loan Crisis between jan 1989 and  dec 1991, Dot-com Bubble between march 2000 and end 2001, Global Financial Crisis between oct 2007 and end 2008
#trade war between the USA and China between jan 2018 and feb 2020
#Event I will add : COVID 19 -> feb 2020 to june of 2021 ; Russian invasion -> feb 2022 deb 2023 ; Trump president and tariffs -> feb 2025 now

uncertainty_map = {
    0: ["1987-10-01", "1987-10-31"],
    1: ["1989-01-01", "1991-12-31"],
    2: ["2000-03-01", "2021-12-31"],
    3: ["2007-10-01", "2008-12-31"],
    4: ["2018-01-01", "2020-02-29"],
    5: ["2020-02-01", "2021-06-30"],
    6: ["2022-02-01", "2023-01-31"],
    7: ["2025-02-01", datetime.today().strftime("%Y-%m-%d")]
}

uncertainty_categ = pd.DataFrame(index=pd.Index(pd.date_range(start='1/1/1985', end=datetime.today().strftime("%d/%m/%Y"), freq='ME'), name="Date")).reset_index()
uncertainty_categ["Year-Month"] = uncertainty_categ["Date"].dt.to_period("M")
uncertainty_categ["Uncertain"] = 0

for _, v in uncertainty_map.items():
    uncertainty_categ.loc[(uncertainty_categ["Date"] >= v[0]) & 
                           (uncertainty_categ["Date"] <= v[1]), 
                           "Uncertain"] = 1
uncertainty_categ.head()

Unnamed: 0,Date,Year-Month,Uncertain
0,1985-01-31,1985-01,0
1,1985-02-28,1985-02,0
2,1985-03-31,1985-03,0
3,1985-04-30,1985-04,0
4,1985-05-31,1985-05,0
