In [1]:
import requests
import pandas as pd
import os
from pymongo.mongo_client import MongoClient
import pymongo

In [2]:
class USDA_API():
    def __init__(self, key):
        self.url = 'https://quickstats.nass.usda.gov/api'
        self.key = key
        self.params = ''
        self.commodity_list = ['congr_district_code', 'state_name', 'week_ending', 'county_name',
       'prodn_practice_desc', 'county_ansi', 'statisticcat_desc', 'CV (%)',
       'Value', 'country_name', 'begin_code', 'end_code', 'state_alpha',
       'year', 'domaincat_desc', 'watershed_code', 'watershed_desc',
       'sector_desc', 'country_code', 'county_code', 'region_desc',
       'util_practice_desc', 'asd_desc', 'location_desc', 'asd_code',
       'domain_desc', 'freq_desc', 'state_fips_code', 'group_desc',
       'source_desc', 'state_ansi', 'class_desc', 'short_desc', 'unit_desc',
       'load_time', 'zip_5', 'reference_period_desc', 'agg_level_desc',
       'commodity_desc']
    
    def add_params(self, fieldname, value):
        self.params += f'&{fieldname}={value}'
    
    def return_params(self):
        return self.params
    
    def return_call(self):
        return self.url + '/api_GET/?' + f'key={self.key}' + self.params + f'&format={self.format}'

    def remove_params(self, fieldname):
        if len(self.params.split('&')) > 1:
            new_params = ''
            size = 1
            remove_params = [item for item in self.params.split('&') if fieldname not in item]
        
            for item in remove_params:
                if len(remove_params) > 1 and len(item) != 0 and size < len(remove_params) - 1:
                    new_params += item + '&'
                    size = size + 1
                else:
                    new_params  += item
            self.params = '&' + new_params
        else:
            self.params = self.params
            print('No Parameters to remove')

    def call(self):
        try:
            response = requests.get(f'{self.url}/api_GET/?key={self.key}{self.params}')
            response.raise_for_status()
            if response.status_code == 200:
                get_counts = requests.get(f'{self.url}/get_counts/?key={self.key}{self.params}').json()
            
                if get_counts['count'] >= 50000:
                    return f'Unable to Process Request. Request is greater than 50000 rows'
                else:
                    return response.json()
                
        except requests.exceptions.HTTPError as errh:
            if   response.status_code == 415:
                return 'Unable to Process Request. Invalid data format'
            elif response.status_code == 401:
                return 'Unable to Process Request. Unauthorized User.'
            
    
    def get_param_values(self, field):
        if field in self.commodity_list:
            return requests.get(f'{self.url}/get_param_values/?key={self.key}&param={field}').json()[field]
        else:
            return 'Invalid Field!'


In [3]:
data = USDA_API(os.environ['usda_key'])

In [4]:
def create_mongo_year_list():
    mongo_years = []
    for i in data.get_param_values('year'):
        if int(i) >= 2000:
            mongo_years.append(i)
    return mongo_years  

Requirements are that we must create a database, a collection, and insert a document to create a database

In [5]:
default_schema = {'domaincat_desc': None, 'year': None, 'watershed_code': None,'watershed_desc': None, 'country_code': None,
 'county_code': None,'sector_desc': None, 'region_desc': None, 'begin_code': None,'end_code': None, 'state_alpha': None,
 'statisticcat_desc': None, 'county_ansi': None, 'Value': None, 'CV (%)': None, 'country_name': None, 'congr_district_code': None,
 'state_name': None, 'week_ending': None, 'county_name': None, 'prodn_practice_desc': None, 'reference_period_desc': None,
 'zip_5': None, 'load_time': None, 'agg_level_desc': None, 'commodity_desc': None, 'unit_desc': None, 'source_desc': None,
 'class_desc': None, 'state_ansi': None, 'short_desc': None, 'asd_code': None, 'location_desc': None, 'util_practice_desc': None,
 'asd_desc': None, 'state_fips_code': None, 'freq_desc': None,  'domain_desc': None, 'group_desc': None}

In [6]:
class MongoDB():
    def __init__(self, username, password, cluster_name, db_name='', col_name='', schema={}):
        self.username = username
        self.password = password
        self.cluster_name = cluster_name
        self.db_name = db_name
        self.col_name = col_name
        self.schema = schema
        self.client = pymongo.MongoClient(f"mongodb+srv://{self.username}:{self.password}@usdacluster.s1juy.mongodb.net/?retryWrites=true&w=majority&appName={self.cluster_name}")
        
    def test_connectivity(self):
        try:
            self.client.admin.command('ping')
            print("Pinged your deployment. You successfully connected to MongoDB!")
        except Exception as e:
            print(e)

    def initialize(self):
        self.client[self.db_name][self.col_name].insert_one(self.schema)

    def drop_col(self):
        self.client[self.db_name][self.col_name].drop()

    def add_record(self, data):
        self.client[self.db_name][self.col_name].insert_one(data)

In [7]:
mongo_instance = MongoDB(username='faiselmahmud6',password='hcV4tiW1Q2qb5nov', \
                         db_name = 'USDA', col_name = 'usda_subset', cluster_name='USDACluster', \
                         schema = default_schema)

mongo_instance.test_connectivity()

Pinged your deployment. You successfully connected to MongoDB!


In [8]:
mongo_instance.initialize()

In [10]:
def populate_usda():
    for year in create_mongo_year_list():
        for commodity_desc in data.get_param_values('commodity_desc'):
            try:
                data.add_params('year',year)
                data.add_params('commodity_desc',commodity_desc)
                current_doc = data.call()['data']
                mongo_instance.add_record(current_doc)
            except:
                data.remove_params('year')
                data.remove_params('commodity_desc')

In [None]:
populate_usda()