In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Load the data
df = pd.read_csv('homework_10.1.csv')

print("Data overview:")
print(df.head())
print(f"\nData shape: {df.shape}")
print(f"\nColumns: {df.columns.tolist()}")
print(f"\nTime periods: {sorted(df['time'].unique())}")
print(f"\nCities: {sorted(df['city'].unique())}")

# Calculate fixed effects for each time period
# Fixed effects regression: y = α_t + β*X + ε
# where α_t is the time fixed effect (constant term for each time period)

fixed_effects = {}

for time_period in sorted(df['time'].unique()):
    # Get data for this time period
    time_data = df[df['time'] == time_period]
    
    # Fit regression: y = α + β*X
    X = time_data[['X']]
    y = time_data['y']
    
    model = LinearRegression()
    model.fit(X, y)
    
    # The intercept is our fixed effect (constant term)
    fixed_effect = model.intercept_
    fixed_effects[time_period] = fixed_effect
    
    print(f"Time {time_period}: Fixed Effect = {fixed_effect:.4f}")

# Convert to lists for plotting
times = list(fixed_effects.keys())
effects = list(fixed_effects.values())

# Plot the fixed effects over time
plt.figure(figsize=(10, 6))
plt.plot(times, effects, 'bo-', linewidth=2, markersize=8)
plt.xlabel('Time Period')
plt.ylabel('Fixed Effect (Constant Term)')
plt.title('Fixed Effects by Time Period')
plt.grid(True, alpha=0.3)
plt.xticks(times)

# Add trend line to better see the pattern
z = np.polyfit(times, effects, 2)  # quadratic fit
p = np.poly1d(z)
plt.plot(times, p(times), "r--", alpha=0.8, label='Quadratic trend')
plt.legend()

plt.tight_layout()
plt.show()

# Analyze the pattern
print(f"\nFixed Effects Analysis:")
print(f"Time 0: {fixed_effects[0]:.4f}")
print(f"Time 5-6: {fixed_effects[5]:.4f}, {fixed_effects[6]:.4f}")
print(f"Time 11: {fixed_effects[11]:.4f}")

# Check if effects increase then decrease (Option B) or decrease then increase (Option D)
mid_point = len(times) // 2
first_half = effects[:mid_point]
second_half = effects[mid_point:]

print(f"\nPattern Analysis:")
print(f"Effects from 0 to ~{mid_point}: {effects[0]:.4f} → {effects[mid_point]:.4f}")
print(f"Effects from ~{mid_point} to 11: {effects[mid_point]:.4f} → {effects[11]:.4f}")

# Find the turning point (max or min)
max_idx = effects.index(max(effects))
min_idx = effects.index(min(effects))

print(f"\nTurning points:")
print(f"Maximum at time {times[max_idx]}: {effects[max_idx]:.4f}")
print(f"Minimum at time {times[min_idx]}: {effects[min_idx]:.4f}")

# Determine the pattern
if max_idx in range(4, 8):  # peak around 5-6
    if effects[0] < effects[max_idx] and effects[max_idx] > effects[-1]:
        pattern = "Option B: Effects increase from month 0 to about 5 or 6 but then eventually decrease again."
    else:
        pattern = "Pattern unclear - need to examine more closely"
elif min_idx in range(4, 8):  # trough around 5-6
    if effects[0] > effects[min_idx] and effects[min_idx] < effects[-1]:
        pattern = "Option D: Effects decrease from month 0 to about 5 or 6 but then eventually increase again."
    else:
        pattern = "Pattern unclear - need to examine more closely"
elif all(effects[i] <= effects[i+1] for i in range(len(effects)-1)):
    pattern = "Option C: Effects gradually increase from month 0 to 11."
else:
    pattern = "Option A: Effects vary randomly."

print(f"\nConclusion: {pattern}")

FileNotFoundError: [Errno 2] No such file or directory: 'homework_10.1.csv'