# USDA Crop Data — Exploratory Data Analysis
This notebook performs an initial exploratory analysis of the processed USDA dataset.

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use("default")

## 2. Load Dataset

In [None]:
df = pd.read_csv("/home/emanuel/usda-etl-pipeline/data/processed/usda_processed.csv")
df.head()

## 3. Inspect Data

In [None]:
df.info()

## 4. Clean Dataset

In [None]:
df = df.dropna(subset=["value"])
df.describe()

## 5. Analysis — Value Distribution

In [None]:
plt.figure(figsize=(10,5))
plt.hist(df["value"], bins=40)
plt.title("Distribution of 'value'")
plt.xlabel("value")
plt.ylabel("Frequency")
plt.show()

## 6. Trend Analysis by Year

In [None]:
yearly = df.groupby("year")["value"].mean().reset_index()

plt.figure(figsize=(10,5))
plt.plot(yearly["year"], yearly["value"])
plt.title("Mean Value per Year")
plt.xlabel("Year")
plt.ylabel("Mean Value")
plt.show()

## 7. Comparison Across States

In [None]:
state_mean = df.groupby("state_name")["value"].mean().sort_values(ascending=False).head(10)

plt.figure(figsize=(10,5))
state_mean.plot(kind="bar")
plt.title("Top 10 States by Mean Value")
plt.ylabel("Mean Value")
plt.show()