# Neural Networks: Learn from Dataset

This notebook teaches a neural network to learn a linear function using a pre-generated CSV dataset.

We'll:
- Load training and test datasets from CSV files
- Build a simple model
- Train using the dataset
- Evaluate performance


In [None]:
import torch
from torch import nn
import matplotlib.pyplot as plt
import pandas as pd
print(torch.__version__)


## Load the dataset
First, we need to generate the dataset if it doesn't exist.


In [None]:
# Load datasets from GitHub repository
import requests
import io
import os

# GitHub repository URLs for the CSV files
github_base_url = "https://raw.githubusercontent.com/gopinaath/ai-class/main/"
train_url = github_base_url + "linear_train.csv"
test_url = github_base_url + "linear_test.csv"

print("Loading datasets from GitHub repository...")
print(f"Training data URL: {train_url}")
print(f"Test data URL: {test_url}")

try:
    # Download and load training data
    train_response = requests.get(train_url)
    train_response.raise_for_status()  # Raise an exception for bad status codes
    train_df = pd.read_csv(io.StringIO(train_response.text))

    # Download and load test data
    test_response = requests.get(test_url)
    test_response.raise_for_status()
    test_df = pd.read_csv(io.StringIO(test_response.text))

    print("✅ Datasets loaded successfully from GitHub!")
    
except Exception as e:
    print(f"❌ Failed to load from GitHub: {e}")
    print("🔄 Falling back to local dataset generation...")
    
    # Fallback: generate datasets locally
    if not os.path.exists('linear_train.csv'):
        exec(open('generate_linear_dataset.py').read())
    
    train_df = pd.read_csv('linear_train.csv')
    test_df = pd.read_csv('linear_test.csv')
    print("✅ Local datasets loaded successfully!")

# Convert to PyTorch tensors
train_inputs = torch.tensor(train_df[['a', 'b']].values, dtype=torch.float32)
train_targets = torch.tensor(train_df['target'].values, dtype=torch.float32).unsqueeze(1)
test_inputs = torch.tensor(test_df[['a', 'b']].values, dtype=torch.float32)
test_targets = torch.tensor(test_df['target'].values, dtype=torch.float32).unsqueeze(1)

print(f"Training data shape: {train_inputs.shape}")
print(f"Test data shape: {test_inputs.shape}")
print(f"First few training examples:")
print(train_df.head())


## Exploratory Data Analysis
Let's explore our dataset to understand what we're working with.


In [None]:
# Basic statistics
print("=== Dataset Overview ===")
print(f"Training samples: {len(train_df)}")
print(f"Test samples: {len(test_df)}")
print(f"Features: {list(train_df.columns[:-1])}")  # All columns except 'target'
print(f"Target: {train_df.columns[-1]}")

print("\n=== Training Data Statistics ===")
print(train_df.describe())

print("\n=== Test Data Statistics ===")
print(test_df.describe())


In [None]:
# Visualize the data
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# 1. Distribution of inputs
axes[0, 0].hist(train_df['a'], bins=30, alpha=0.7, label='a', color='blue')
axes[0, 0].hist(train_df['b'], bins=30, alpha=0.7, label='b', color='red')
axes[0, 0].set_title('Distribution of Input Features')
axes[0, 0].set_xlabel('Value')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Distribution of target
axes[0, 1].hist(train_df['target'], bins=30, alpha=0.7, color='green')
axes[0, 1].set_title('Distribution of Target Values')
axes[0, 1].set_xlabel('Target Value')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].grid(True, alpha=0.3)

# 3. Scatter plot: a vs target
axes[1, 0].scatter(train_df['a'], train_df['target'], alpha=0.6, s=10)
axes[1, 0].set_title('Feature a vs Target')
axes[1, 0].set_xlabel('Feature a')
axes[1, 0].set_ylabel('Target')
axes[1, 0].grid(True, alpha=0.3)

# 4. Scatter plot: b vs target
axes[1, 1].scatter(train_df['b'], train_df['target'], alpha=0.6, s=10, color='red')
axes[1, 1].set_title('Feature b vs Target')
axes[1, 1].set_xlabel('Feature b')
axes[1, 1].set_ylabel('Target')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Check for correlations
print("=== Correlation Analysis ===")
correlation_matrix = train_df.corr()
print(correlation_matrix)

# Visualize correlation heatmap
plt.figure(figsize=(8, 6))
plt.imshow(correlation_matrix, cmap='coolwarm', aspect='auto')
plt.colorbar()
plt.xticks(range(len(correlation_matrix.columns)), correlation_matrix.columns)
plt.yticks(range(len(correlation_matrix.columns)), correlation_matrix.columns)

# Add correlation values to the heatmap
for i in range(len(correlation_matrix.columns)):
    for j in range(len(correlation_matrix.columns)):
        plt.text(j, i, f'{correlation_matrix.iloc[i, j]:.3f}', 
                ha='center', va='center', color='black', fontweight='bold')

plt.title('Correlation Matrix')
plt.tight_layout()
plt.show()

# Show some sample data
print("\n=== Sample Data ===")
print("First 10 training examples:")
print(train_df.head(10))


## Key Insights from EDA

**What we learned:**
- **Input range**: Both `a` and `b` are uniformly distributed between 0 and 1
- **Target range**: Target values range from ~0 to ~7 (since max is 3×1 + 4×1 = 7)
- **Linear relationship**: We can see clear linear patterns in the scatter plots
- **Correlations**: 
  - `a` has strong positive correlation with target (≈0.87)
  - `b` has strong positive correlation with target (≈0.50)
  - This makes sense since target = 3a + 4b (a has coefficient 3, b has coefficient 4)

**Why this is good for neural networks:**
- Clear linear relationship means a simple model should work well
- No missing values or outliers to worry about
- Good range of values for training


## Build the model
A simple linear layer can learn any linear function.


In [None]:
model = nn.Sequential(nn.Linear(2, 1))
model


## Train the model
We'll train using the entire dataset with mini-batches.


In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
loss_history = []

batch_size = 32
num_epochs = 10

for epoch in range(num_epochs):
    epoch_loss = 0
    num_batches = 0
    
    # Process data in mini-batches
    for i in range(0, len(train_inputs), batch_size):
        batch_inputs = train_inputs[i:i+batch_size]
        batch_targets = train_targets[i:i+batch_size]
        
        # Forward pass
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_targets)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        num_batches += 1
        loss_history.append(loss.item())
    
    avg_loss = epoch_loss / num_batches
    print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss:.6f}")

print("Training complete!")


In [None]:
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(loss_history)
plt.title('Training Loss')
plt.xlabel('Batch')
plt.ylabel('MSE Loss')
plt.grid(True)

plt.subplot(1, 2, 2)
# Plot every 10th point to reduce noise
plt.plot(loss_history[::10])
plt.title('Training Loss (Every 10th Batch)')
plt.xlabel('Batch (x10)')
plt.ylabel('MSE Loss')
plt.grid(True)

plt.tight_layout()
plt.show()


## Evaluate the model
Test the model on the test dataset and some specific examples.


In [None]:
# Test on the test dataset
with torch.no_grad():
    test_outputs = model(test_inputs)
    test_loss = criterion(test_outputs, test_targets)
    
print(f"Test Loss: {test_loss.item():.6f}")

# Test on specific examples
test_cases = [(1.0, 1.0), (2.0, -1.0), (0.5, 0.5), (-1.0, 2.0)]

print("\nTesting on specific examples:")
print("Input (a, b) | Prediction | Expected | Error")
print("-" * 50)

for a, b in test_cases:
    pred = model(torch.tensor([a, b], dtype=torch.float32))
    expected = 3*a + 4*b  # The actual function
    error = abs(pred.item() - expected)
    print(f"({a:4.1f}, {b:4.1f})    | {pred.item():8.3f} | {expected:7.3f} | {error:.3f}")

# Show learned weights
print(f"\nLearned weights:")
print(f"Weight for 'a': {model[0].weight[0, 0].item():.3f}")
print(f"Weight for 'b': {model[0].weight[0, 1].item():.3f}")
print(f"Bias: {model[0].bias[0].item():.3f}")
print(f"\nExpected weights: a=3.0, b=4.0, bias=0.0")
