# Coding for Economists - Advanced Session 2

## 1. Setup Environment

In [None]:
%pip install tensorflow scikeras torch dash dash-cytoscape

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Turn on copy on write
pd.options.mode.copy_on_write = True

## 2. Use LSTM to Forecast US Inflation

### 2.1 Collect Data

In [17]:
# Fetch multiple FRED series
from pandas_datareader import data as pdr

start_date = '1990-01-01'
df = pdr.DataReader(['CPIAUCSL', 'UNRATE', 'FEDFUNDS'], 'fred', start=start_date)
print(df.shape)
df.head()

(424, 3)


Unnamed: 0_level_0,CPIAUCSL,UNRATE,FEDFUNDS
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1990-01-01,127.5,5.4,8.23
1990-02-01,128.0,5.3,8.24
1990-03-01,128.6,5.2,8.28
1990-04-01,128.9,5.4,8.26
1990-05-01,129.1,5.4,8.18


### 2.2 Preprocessing

#### Check Missing Values

In [18]:
print("Missing values per column:\n", df.isnull().sum())

Missing values per column:
 CPIAUCSL    1
UNRATE      1
FEDFUNDS    0
dtype: int64


#### Compute Year-to-Year Inflation

In [None]:
# Compute year-on-year inflation
df = df.dropna().rename(columns={
    'CPIAUCSL': 'CPI', 'UNRATE': 'Unemployment', 'FEDFUNDS': 'FedFunds'
})
df['Inflation'] = df['CPI'].pct_change(12) * 100
df = df.dropna()
features = df[['Inflation', 'Unemployment', 'FedFunds']].values
features[:20]

In [None]:
# Scale variables using Minmax
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled = scaler.fit_transform(features)
scaled[:20]

#### Prepare Input Data for LSTM
Create a list of sliding windows. At time `t`, LSTM uses past values `[t-seq_len:t-1]` to predict the current value `t`

In [None]:
# Prepare input data for LSTM
def create_sequences(data, seq_len=12):
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len, 0])
    return np.array(X), np.array(y)

SEQ_LEN = 12 # use the past 12 months to predict the next month
X, y = create_sequences(scaled, SEQ_LEN)
print(X.shape)
print(y.shape)

### 2.3 Prepare Train/Test Sets

In [None]:
# Train/test split (80/20)
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

### 2.4 Implement ISTM and Tune the Parameters

#### Initiate the Model

In [None]:
# Model-building function for GridSearch
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
def build_model(units=50, optimizer='adam'):
    m = Sequential([
        Input(shape=(SEQ_LEN, X.shape[2])),
        LSTM(units),
        Dense(1)
    ])
    m.compile(optimizer=optimizer, loss='mse')
    return m

In [None]:
# Wrap with KerasRegressor
from scikeras.wrappers import KerasRegressor
regressor = KerasRegressor(model=build_model, verbose=0, units=50, run_eagerly=True)

__Grid Search Space__:
- `optimizer`: Algorithm for updating weights
- `batch_size`: How many samples per gradient update
- `epochs`: Max training passes through the data

In [None]:
# Grid of hyperparameters
param_grid = {
    'optimizer': ['adam', 'rmsprop'],
    'batch_size': [8, 16],
    'epochs': [30, 40]
}

__Split Time Series__:
>> - Split 1: train 1–20   | test 21–40
>> - Split 2: train 1–40   | test 41–60
>> - Split 3: train 1–60   | test 61–80
>> - Split 4: train 1–80   | test 81–100
>> - Split 5: train 1–100  | test (nothing left -> skip)

In [None]:
# Time series cross-validation
from sklearn.model_selection import TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)

#### Grid Search

In [None]:
# Grid search
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(
    estimator=regressor,
    param_grid=param_grid,
    cv=tscv,
    scoring='neg_mean_squared_error',
    n_jobs=-1
)
grid_result = grid.fit(X_train, y_train)

print("Best MSE: {:.4f}".format(-grid_result.best_score_))
print("Best Params:", grid_result.best_params_)

### 2.5 Predict the Historical Inflation

In [None]:
# Select the best model
best_keras_model = grid_result.best_estimator_.model_

# Predict on every available sequence in the data
hist_preds_scaled = best_keras_model.predict(X)

In [None]:
# Inverse‐transform back to % inflation
hist_preds = scaler.inverse_transform(
    np.hstack([
        hist_preds_scaled.reshape(-1, 1),
        np.zeros((len(hist_preds_scaled), 2))
    ])
)[:, 0]  # get the inflation column

### 2.6 Forecast the Future 12 Months

In [None]:
# Forecast next 12 months
n_future = 12
last_seq = scaled[-SEQ_LEN:].copy()
future_preds_scaled = []

for _ in range(n_future):
    p = best_keras_model.predict(last_seq.reshape(1, SEQ_LEN, X.shape[2]))[0, 0]
    future_preds_scaled.append([p, last_seq[-1,1], last_seq[-1,2]])  # keep other features static
    last_seq = np.vstack([last_seq[1:], future_preds_scaled[-1]])

In [None]:
# extract inflation forecasts and inverse scale
future_preds = np.array(future_preds_scaled)[:, 0].reshape(-1, 1)
inflation_preds = scaler.inverse_transform(
    np.hstack([future_preds, np.zeros((n_future, 2))])
)[:, 0]

future_dates = pd.date_range(df.index[-1], periods=n_future, freq='ME')
forecast_series = pd.Series(inflation_preds, index=future_dates)

### 2.7 Plot the Results

In [None]:
hist_dates = df.index[SEQ_LEN:]
historical = df['Inflation'].copy()

sns.set(style="whitegrid")
plt.figure(figsize=(10, 6))

# Actual historical inflation
sns.lineplot(x=historical.index, y=historical.values, label='Actual Inflation')

# Model’s in‐sample fit
sns.lineplot(x=hist_dates, y=hist_preds, label='Model Fit')

# 12‐month out‐of‐sample forecast (as before)
sns.lineplot(
    x=forecast_series.index,
    y=forecast_series.values,
    label='12-Month Forecast',
    linestyle='--'
)

plt.title("US YoY Inflation + 12-Month Forecast")
plt.xlabel("Date")
plt.ylabel("Inflation Rate (%)")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
forecast_series

## 3. Use GNN to Build Network of US Industries

### 3.1 Load Data

In [None]:
# Load 2024 industry input–output “Use” table
io = pd.read_csv("BEA_IO_Accounts.csv", index_col=0, dtype=str, na_values=['---'])
io = io.astype(float)

# Load anual industry growth data for 2023 and 2024
df_growth = pd.read_csv("BEA_Industry_Growth.csv", index_col=0)

# Keep common industries
common_index = io.index.intersection(df_growth.index)
io     = io.loc[common_index, common_index]
df_growth = df_growth.loc[common_index]

io.head()

In [None]:
df_growth.head()

### 3.2 Preprocessing

#### Prepare Edges and Weights

In [None]:
# Build edge list & weights
edges = []
weights = []
for i, supplier in enumerate(io.index):
    for j, customer in enumerate(io.columns):
        val = io.iloc[i, j]
        if val > 0:
            edges.append((i, j))
            weights.append(val)

#### Put Data in a Tensor Object

In [None]:
import torch
from torch_geometric.data import Data
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()  # shape [2, E]
edge_attr  = torch.tensor(weights, dtype=torch.float).unsqueeze(1)   # [E,1]

# Node features: last-year’s value-added growth 
x = torch.tensor(df_growth.Growth23.values.reshape(-1,1), dtype=torch.float)

# Target: this year’s growth
y = torch.tensor(df_growth.Growth24.values, dtype=torch.float)

# Put together
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

### 3.3 Build the Network Using GNN

#### Initiate the GNN Model

In [None]:
# Define a two–layer GCN
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

class IO_GCN(torch.nn.Module):
    def __init__(self, in_feats, hidden=16):
        super().__init__()
        self.conv1 = GCNConv(in_feats, hidden)
        self.conv2 = GCNConv(hidden, 1)
    def forward(self, data):
        h = F.relu(self.conv1(data.x, data.edge_index, data.edge_attr))
        h = self.conv2(h, data.edge_index, data.edge_attr).squeeze()
        return h

model = IO_GCN(in_feats=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn   = torch.nn.MSELoss()

#### Get the Train/Test Indices

In [None]:
# Train/test split (e.g. 80/20 on nodes)
num_nodes = data.num_nodes
perm = torch.randperm(num_nodes)
train_idx = perm[:int(0.8*num_nodes)]
test_idx  = perm[int(0.8*num_nodes):]

#### Train the Model

In [None]:
# Training loop
for epoch in range(1000):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = loss_fn(out[train_idx], data.y[train_idx])
    loss.backward()
    optimizer.step()
    if epoch%50==0:
        print(f"Epoch {epoch:03d}  Loss: {loss.item():.4f}")

#### Evaluate the Trained Model

In [None]:
# Evaluate
model.eval()
preds = model(data).detach()
mse = loss_fn(preds[test_idx], data.y[test_idx])
print(f"\nTest MSE: {mse:.4f}")

In [None]:
# Compare the predicted growth and actual growth
import matplotlib.pyplot as plt
plt.scatter(data.y[test_idx], preds[test_idx])
plt.plot([data.y.min(), data.y.max()],
         [data.y.min(), data.y.max()], '--', lw=2)
plt.xlabel("Actual Growth (%)")
plt.ylabel("Predicted Growth (%)")
plt.title("GCN Predictions of Industry Growth")
plt.show()

#### Plot the Network

In [None]:
from torch_geometric.utils import to_networkx

# Convert to a NetworkX graph
G = to_networkx(
    data,
    node_attrs=[],       
    edge_attrs=[],
    to_undirected=True    
)

edge_index = data.edge_index.t().tolist() 
weights     = data.edge_attr.squeeze().tolist()
for (u, v), w in zip(edge_index, weights):
    G[u][v]['weight'] = float(w)   

In [None]:
# Filter edges: no self-loops, top 2% by weight
all_edges   = [(u, v, d) for u, v, d in G.edges(data=True) if u != v]
all_weights = [d['weight'] for _, _, d in all_edges]
threshold   = np.percentile(all_weights, 98) 

important_edges = [
    (u, v, d) for u, v, d in all_edges
    if d['weight'] >= threshold
]

In [None]:
# Build subgraph of just those edges & their nodes
important_nodes = set()
for u, v, _ in important_edges:
    important_nodes.update([u, v])

G_sub = G.subgraph(important_nodes).copy()

In [None]:
# Build Cytoscape “elements” list
elements = []

# nodes
for node in G_sub.nodes():
    elements.append({
        'data': {
            'id': str(node),
            'label': common_index[node] 
        }
    })

# edges
for u, v, d in important_edges:
    elements.append({
        'data': {
            'source': str(u),
            'target': str(v),
            'weight': d['weight']
        }
    })
elements[:40]

In [None]:
# Create the Dash app
import dash
from dash import html
import dash_cytoscape as cyto

app = dash.Dash(__name__)
app.layout = html.Div([
    html.H3("Industry I/O Network (Top 2% Flows, No Self-Loops)"),
    cyto.Cytoscape(
        id='cytoscape-network',
        elements=elements,
        layout={'name': 'cose'},     
        style={'width': '100%', 'height': '700px'},
        stylesheet=[
            # nodes: wrap truncated label, small font
            {
                'selector': 'node',
                'style': {
                    'content':         'data(label)',
                    'text-wrap':       'wrap',
                    'text-max-width':  80,
                    'font-size':       '8px',
                    'background-color':'#67a9cf',
                    'width':           '20px',
                    'height':          '20px'
                }
            },
            {
                'selector': 'node',
                'style': {
                    'label': 'data(label)',
                    'background-color': '#67a9cf',
                    'width': 20,
                    'height': 20,
                    'font-size': '10px',
                    'text-valign': 'center',
                    'text-halign': 'center'
                }
            },
            {
                'selector': 'edge',
                'style': {
                    # map weight (0→max) to line-width 1→8
                    'width': f"mapData(weight, 0, {max(all_weights)}, 1, 8)",
                    'line-color': '#888',
                    'curve-style': 'bezier'
                }
            }
        ]
    )
])

if __name__ == '__main__':
    app.run(debug=False)