Empirical Application: Data Cleaning. This Jupyter notebook contains the Python code used to generate cleaned corn and soybean datasets.

The inputs are three DTA (STATA) datasets from Schlenker and Roberts (PNAS, 2009): yieldData.dta, weather_corn.dta, and weather_soybeans.dta. These files can be found in the dataSTATA folder here: https://www.wolfram-schlenker.info/replicationFiles/SchlenkerRoberts2009.zip

The output is two CSV files used subsequently in the data analysis: corn.csv and soybeans.csv

The code was tested on Python v3.12.7.

In [2]:
# Create corn dataset
import pandas as pd
import numpy as np

yields = pd.read_stata("/Users/ababii/Library/CloudStorage/Dropbox/SchlenkerRoberst2009/dataSTATA/yieldData.dta")
weather = pd.read_stata("/Users/ababii/Library/CloudStorage/Dropbox/SchlenkerRoberst2009/dataSTATA/weather_corn.dta")

# Treatment (aggregation) of weather data over seasons
monthMin = 3
monthMax = 8
yearMin = 1950
yearMax = 2020

weather = weather[(weather['month'] >= monthMin) &
 (weather['month'] <= monthMax) & (weather['year'] >= yearMin) &
  (weather['year'] <= yearMax)]

weather = weather.groupby(['fips', 'year']).sum()
weather = weather.filter(regex='^(time|dday|prec)')

# Treatment of yields
yiels = yields[['crop', 'fips', 'latitude', 'longitude', 'year', 'yield']]

yields_corn = yields[(yields['crop'] == 'corn') & (yields['year'] >= yearMin) &
 (yields['year'] <= yearMax)]

# Merging of the datasets and treatment
merged_df = pd.merge(weather, yields_corn, on=['fips', 'year'])
merged_df['logYield'] = np.log(merged_df['yield'])
merged_df['state'] = (merged_df['fips'] // 1000).astype(int)
merged_df['t'] = merged_df['year'] - yearMin
merged_df['t2'] = merged_df['t'] ** 2
merged_df['prec2'] = merged_df['prec'] ** 2

# Creation of control variables
dummies = pd.get_dummies(merged_df['state'], prefix='state')
df = pd.concat([merged_df, dummies], axis=1)

dummies = pd.get_dummies(df['fips'], prefix='fips')
df = pd.concat([df, dummies], axis=1)

for i in df['state'] :
    nom_colonne_state = f'state_{i}'
    nom_colonne_state_t = f'state_{i}_t'
    df[nom_colonne_state_t] = df[nom_colonne_state] * df['t']

    nom_colonne_state = f'state_{i}'
    nom_colonne_state_t2 = f'state_{i}_t2'
    df[nom_colonne_state_t2] = df[nom_colonne_state] * df['t2']
    
df = df.drop(columns = ['crop', 'longitude', 'latitude', 'yield', 'areaHarv', 't', 't2'])
df.to_csv('/Users/ababii/Library/CloudStorage/Dropbox/SchlenkerRoberst2009/corn.csv', index=False)

In [3]:
# Create soybean dataset
weather = pd.read_stata("/Users/ababii/Library/CloudStorage/Dropbox/SchlenkerRoberst2009/dataSTATA/weather_soybeans.dta")

# Treatment (aggregation) of weather data over seasons
weather = weather[(weather['month'] >= monthMin) &
 (weather['month'] <= monthMax) & (weather['year'] >= yearMin) &
  (weather['year'] <= yearMax)]

weather = weather.groupby(['fips', 'year']).sum()
weather = weather.filter(regex='^(time|dday|prec)')

# Treatment of yields
yields_soybeans = yields[(yields['crop'] == 'soybeans') & (yields['year'] >= yearMin) &
 (yields['year'] <= yearMax)]

# Merging of the datasets and treatment
merged_df = pd.merge(weather, yields_soybeans, on=['fips', 'year'])
merged_df['logYield'] = np.log(merged_df['yield'])
merged_df['state'] = (merged_df['fips'] // 1000).astype(int)
merged_df['t'] = merged_df['year'] - yearMin
merged_df['t2'] = merged_df['t'] ** 2
merged_df['prec2'] = merged_df['prec'] ** 2

# Creation of control variables
dummies = pd.get_dummies(merged_df['state'], prefix='state')
df = pd.concat([merged_df, dummies], axis=1)

dummies = pd.get_dummies(df['fips'], prefix='fips')
df = pd.concat([df, dummies], axis=1)

for i in df['state'] :
    nom_colonne_state = f'state_{i}'
    nom_colonne_state_t = f'state_{i}_t'
    df[nom_colonne_state_t] = df[nom_colonne_state] * df['t']

    nom_colonne_state = f'state_{i}'
    nom_colonne_state_t2 = f'state_{i}_t2'
    df[nom_colonne_state_t2] = df[nom_colonne_state] * df['t2']
    
df = df.drop(columns = ['crop', 'longitude', 'latitude', 'yield', 'areaHarv', 't', 't2'])
df.to_csv('/Users/ababii/Library/CloudStorage/Dropbox/SchlenkerRoberst2009/soybeans.csv', index=False)