### Brief Description

This notebook was used to download insurance data from the USDA RMA


In [2]:
import pandas as pd
import requests
from io import StringIO, BytesIO
import zipfile
import os
from typing import Optional, List
import time
import warnings
warnings.filterwarnings('ignore')


In [3]:
## Config

BASE_URL = "https://pubfs-rma.fpac.usda.gov/pub/Web_Data_Files/Summary_of_Business/state_county_crop/"

#https://pubfs-rma.fpac.usda.gov/pub/Web_Data_Files/Summary_of_Business/state_county_crop/sobcov_2024.zip

In [4]:
def download_rma_data(year: int) -> pd.DataFrame:
    """
    Download USDA RMA Summary of Business data for a specific year
    
    Args:
        year (int): Year to download data for 
    
    """
    
    zip_filename = f"sobcov_{year}.zip"
    download_url = f"{BASE_URL}/{zip_filename}"
    
    
    print(f"Downloading RMA data for year {year}")
    
    
    try:
        print(f"Trying URL: {download_url}")
        response = requests.get(download_url, timeout=30)
        response.raise_for_status()
        
        # Process the ZIP file
        with zipfile.ZipFile(BytesIO(response.content)) as zip_ref:
            # List files in the ZIP
            file_list = zip_ref.namelist()
            print(f"Files in ZIP: {file_list}")
            
            
            data_file = None
            for filename in file_list:
                if filename.endswith(('.txt')):
                    data_file = filename
            
            
            if not data_file:
                print(f"No data file found in ZIP for year {year}")
            
            print(f"Processing file: {data_file}")
            
            # Extract and read the data file
            with zip_ref.open(data_file) as file:
                # RMA files are typically pipe-delimited
                content = file.read().decode('utf-8', errors='ignore')
                
                # Save raw data if requested
                os.makedirs('data/raw', exist_ok=True)
                with open(f'data/raw/rma_{year}.txt', 'w', encoding='utf-8') as f:
                    f.write(content)
                
                # Read into DataFrame
                df = pd.read_csv(StringIO(content), delimiter='|', low_memory=False)
                
                print(f"Successfully downloaded {len(df)} records for year {year}")
                print(f"Columns: {list(df.columns)}")
                
                return df
                
    except requests.exceptions.RequestException as e:
        print(f"Failed to download from {download_url}: {e}")
    except zipfile.BadZipFile as e:
        print(f"Invalid ZIP file from {download_url}: {e}")
    except Exception as e:
        print(f"Error processing {download_url}: {e}")
    
    
    return pd.DataFrame()
