# Wrangling Soil Test data from University of Kentucky's Soil Lab

Use Microsoft Access to export data into CSV text file with FIPS code add and quary to select just County by County name. Export as soildata_fips.txt.

#### import python libraries

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import warnings

In [2]:
from ipywidgets import widgets
from pandas_profiling import ProfileReport
# from pandas_profiling.utils.cache import cache_file

#### set file path to get data to work on

In [3]:
filePath = Path('data')
fileOut = Path('project-data')
file_soil = filePath.joinpath('soildata_fips.txt')

#### Read data into pandas

In [4]:
soil = pd.read_csv(file_soil, dtype='str')

#### Check that file is read into memory

In [5]:
soil.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1190126 entries, 0 to 1190125
Data columns (total 14 columns):
FIPS_NO    1190126 non-null object
YEAR       1190126 non-null object
FM         1190052 non-null object
COUNTY     1190126 non-null object
AREA       1190126 non-null object
PH         1187607 non-null object
BUPH       1056246 non-null object
P          1187473 non-null object
K          1187494 non-null object
CA         969266 non-null object
MG         969725 non-null object
ZN         967041 non-null object
ACRES      525128 non-null object
CROP       1183431 non-null object
dtypes: object(14)
memory usage: 127.1+ MB


In [6]:
year_dup = soil.YEAR.unique()
year_dup

array(['1990.00', '1991.00', '1992.00', '1993.00', '1994.00', '1995.00',
       '1996.00', '1997.00', '1998.00', '1999.00', '2000.00', '2001.00',
       '2002.00', '2003.00', '2004.00', '2005.00', '2006.00', '2007.00',
       '2008.00', '2009.00', '2010.00', '2011.00', '2012.00', '2013.00',
       '2014.00', '2015.00', '2016.00', '2017.00', '2018.00', '2019.00'],
      dtype=object)

#### Remove decimal from FIPS_NO and Year, can't convert to an integer because of pivot table columns later in processing. Convert PH, BUPH, P, K, and Acres into Float type.

In [7]:
df = soil.copy()

In [8]:
df.FIPS_NO = df.FIPS_NO.astype('str').replace('\.00','',regex=True)
df.YEAR = df.YEAR.astype('str').replace('\.00','',regex=True)
df.PH = df.PH.astype('float')
df.BUPH = df.BUPH.astype('float')
df.P = df.P.astype('float')
df.K = df.K.astype('float')
df.ACRES = df.ACRES.astype('float')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1190126 entries, 0 to 1190125
Data columns (total 14 columns):
FIPS_NO    1190126 non-null object
YEAR       1190126 non-null object
FM         1190052 non-null object
COUNTY     1190126 non-null object
AREA       1190126 non-null object
PH         1187607 non-null float64
BUPH       1056246 non-null float64
P          1187473 non-null float64
K          1187494 non-null float64
CA         969266 non-null object
MG         969725 non-null object
ZN         967041 non-null object
ACRES      525128 non-null float64
CROP       1183431 non-null object
dtypes: float64(5), object(9)
memory usage: 127.1+ MB


In [9]:
df.info()
print(df.head())
df.tail()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1190126 entries, 0 to 1190125
Data columns (total 14 columns):
FIPS_NO    1190126 non-null object
YEAR       1190126 non-null object
FM         1190052 non-null object
COUNTY     1190126 non-null object
AREA       1190126 non-null object
PH         1187607 non-null float64
BUPH       1056246 non-null float64
P          1187473 non-null float64
K          1187494 non-null float64
CA         969266 non-null object
MG         969725 non-null object
ZN         967041 non-null object
ACRES      525128 non-null float64
CROP       1183431 non-null object
dtypes: float64(5), object(9)
memory usage: 127.1+ MB
  FIPS_NO  YEAR FM COUNTY                AREA    PH  BUPH      P      K  \
0       1  1990  A  ADAIR  Eastern Pennyroyal  7.15  7.23   28.0  158.0   
1       1  1990  A  ADAIR  Eastern Pennyroyal  6.95  7.22   88.0  134.0   
2       1  1990  A  ADAIR  Eastern Pennyroyal  6.26  6.94   70.0  256.0   
3       1  1990  A  ADAIR  Eastern Pennyro

Unnamed: 0,FIPS_NO,YEAR,FM,COUNTY,AREA,PH,BUPH,P,K,CA,MG,ZN,ACRES,CROP
1190121,239,2019,A,WOODFORD,Bluegrass,5.0,6.3,62.0,319.0,1489.0,223.0,3.5,1.0,Wildlife Food Plot
1190122,239,2019,A,WOODFORD,Bluegrass,5.9,6.7,46.0,257.0,5247.0,268.0,2.1,2.0,Wildlife Food Plot
1190123,239,2019,A,WOODFORD,Bluegrass,6.8,7.0,75.0,243.0,12047.0,281.0,1.2,2.0,Wildlife Food Plot
1190124,239,2019,A,WOODFORD,Bluegrass,5.3,6.6,60.0,407.0,3304.0,396.0,2.8,,Wildlife Food Plot
1190125,239,2019,A,WOODFORD,Bluegrass,5.0,6.3,59.0,377.0,4341.0,349.0,2.0,1.5,Wildlife Food Plot


#### Create profile report 

#### Drop CA, MG, ZN

In [10]:
df = df.drop(['CA','MG','ZN'], axis=1)

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1190126 entries, 0 to 1190125
Data columns (total 11 columns):
FIPS_NO    1190126 non-null object
YEAR       1190126 non-null object
FM         1190052 non-null object
COUNTY     1190126 non-null object
AREA       1190126 non-null object
PH         1187607 non-null float64
BUPH       1056246 non-null float64
P          1187473 non-null float64
K          1187494 non-null float64
ACRES      525128 non-null float64
CROP       1183431 non-null object
dtypes: float64(5), object(6)
memory usage: 99.9+ MB


#### Check the maximum and minimum values for P and K 

In [12]:
print("max P =", df.P.max(), "min P =",df.P.min())
print("max K" , df.K.max(), "min K =", df.K.min())

max P = 21658.0 min P = -9.0
max K 60452.0 min K = -26.0


#### Remove values less than zero and above 9999

In [13]:
df = df[~(df['P'] < 0)]
df = df[~(df['K'] < 0)]
df = df[~(df['P'] >= 9999)]
df = df[~(df['K'] >= 9999)]


In [14]:
print("max P =", df.P.max(), "min P =",df.P.min())
print("max K" , df.K.max(), "min K =", df.K.min())

max P = 9778.0 min P = 0.0
max K 9964.0 min K = 1.0


#### Select agricultural "A" and commercial "C" types from FM column. Append df together.

In [15]:
df1 = df.loc[(df['FM'] == 'A')]
df2 = df.loc[(df['FM'] == 'C')]
df3 = df1.append(df2, ignore_index=True)

In [16]:
print(df1.info())
print(df2.info())
print(df3.info())


<class 'pandas.core.frame.DataFrame'>
Int64Index: 941637 entries, 0 to 1190125
Data columns (total 11 columns):
FIPS_NO    941637 non-null object
YEAR       941637 non-null object
FM         941637 non-null object
COUNTY     941637 non-null object
AREA       941637 non-null object
PH         940288 non-null float64
BUPH       836405 non-null float64
P          940284 non-null float64
K          940295 non-null float64
ACRES      511570 non-null float64
CROP       938347 non-null object
dtypes: float64(5), object(6)
memory usage: 86.2+ MB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 21910 entries, 153 to 1190012
Data columns (total 11 columns):
FIPS_NO    21910 non-null object
YEAR       21910 non-null object
FM         21910 non-null object
COUNTY     21910 non-null object
AREA       21910 non-null object
PH         21882 non-null float64
BUPH       20540 non-null float64
P          21881 non-null float64
K          21883 non-null float64
ACRES      7362 non-null float64
CROP

#### Drop null values from CROP, P, K.

In [17]:
df3.drop(df3[df3['CROP'].isnull()].index, inplace=True)
df3.drop(df3[df3['P'].isnull()].index, inplace=True)
df3.drop(df3[df3['K'].isnull()].index, inplace=True)
df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 958826 entries, 0 to 963546
Data columns (total 11 columns):
FIPS_NO    958826 non-null object
YEAR       958826 non-null object
FM         958826 non-null object
COUNTY     958826 non-null object
AREA       958826 non-null object
PH         958813 non-null float64
BUPH       854104 non-null float64
P          958826 non-null float64
K          958826 non-null float64
ACRES      517097 non-null float64
CROP       958826 non-null object
dtypes: float64(5), object(6)
memory usage: 87.8+ MB


#### Resort and index dataframe.

In [18]:
df = df3[['FIPS_NO','COUNTY','AREA','YEAR','CROP','ACRES', 'PH', 'BUPH', 'P', 'K', ]]
order_by_cols = ['FIPS_NO','YEAR','CROP']
df = df.sort_values(by=order_by_cols, ascending=[True,True,True]).copy()
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,FIPS_NO,COUNTY,AREA,YEAR,CROP,ACRES,PH,BUPH,P,K
0,1,ADAIR,Eastern Pennyroyal,1990,Alfalfa,18.0,7.15,7.23,28.0,158.0
1,1,ADAIR,Eastern Pennyroyal,1990,Alfalfa,15.0,6.95,7.22,88.0,134.0
2,1,ADAIR,Eastern Pennyroyal,1990,Alfalfa,16.0,6.26,6.94,70.0,256.0
3,1,ADAIR,Eastern Pennyroyal,1990,Alfalfa,6.0,5.67,6.69,161.0,611.0
4,1,ADAIR,Eastern Pennyroyal,1990,Alfalfa,25.0,7.26,7.47,105.0,315.0


#### Save clean CSV file 

In [19]:
df.to_csv(r'data\clean_soil_data.csv', index = False)

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 958826 entries, 0 to 958825
Data columns (total 10 columns):
FIPS_NO    958826 non-null object
COUNTY     958826 non-null object
AREA       958826 non-null object
YEAR       958826 non-null object
CROP       958826 non-null object
ACRES      517097 non-null float64
PH         958813 non-null float64
BUPH       854104 non-null float64
P          958826 non-null float64
K          958826 non-null float64
dtypes: float64(5), object(5)
memory usage: 73.2+ MB


#### Find unique CROP types. 

In [21]:
croptypes = df.CROP.unique()
croptypes

array(['Alfalfa', 'Alfalfa/Cool Season', 'Burley Tobacco', 'Clover/Grass',
       'Cole Crops (broccoli, etc.)', 'Corn', 'Corn, Sweet', 'Cucumbers',
       'Fescue', 'No Info Given', 'Orchardgrass', 'Other Vegetables',
       'Peppers (bell & pimento)', 'Red Clover', 'Timothy', 'Tomatoes',
       'White Clover', 'White Clover/Grass', 'Rye', 'Soybeans',
       'Tobacco Beds', 'Wheat', 'Oats', 'Red Clover/Grass',
       'Warm Season Grass', 'Blueberries', 'Fescue/Lespedeza (multiple)',
       'Forage Sorghum', 'Strawberries', 'Cool Season Grass',
       'Evergreen Shrubs, Broadleaved', 'Sudangrass',
       'Timothy/Red Clover', 'Lespedeza', 'Other Fruit & Nuts',
       'Small Grains/Corn', 'Small Grains/Soybeans', 'Squash & Pumpkins',
       'Birdsfoot Trefoil', 'Grain Sorghum', 'Lespedeza/Grass', 'Annuals',
       'Fescue/Lespedeza', 'Forage Crops', 'Millet',
       'Orchardgrass/Red Clover', 'Apples', 'Grapes', 'Peaches',
       'Small Grains', 'Bermudagrass, common', 'Sweet Potatoes',

## Select CROP based on AGR-1 crop types.

## Corn

#### Create list to select Corn from database.

In [22]:
corn_sel = ['Corn','Small Grains/Corn','Wheat/Corn']
corn_sel.sort()
print(corn_sel)

['Corn', 'Small Grains/Corn', 'Wheat/Corn']


#### Create dataframe for nutrients phosphorus (P) and potassium (K).

In [23]:
df_corn = df[df.CROP.isin(corn_sel)]
df_corn_nu = df_corn[['FIPS_NO','COUNTY','YEAR','P','K']].copy()
print(df_corn_nu.head())

    FIPS_NO COUNTY  YEAR     P      K
155       1  ADAIR  1990  37.0  146.0
156       1  ADAIR  1990  93.0  105.0
157       1  ADAIR  1990  25.0  252.0
158       1  ADAIR  1990  24.0  121.0
159       1  ADAIR  1990  92.0  283.0


### Calculate median for each County and year

In [24]:
df_corn_median = np.round( df_corn_nu.pivot_table(index='COUNTY', columns=['YEAR'], values=['P','K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_corn_median)
print(df_corn_median.columns)
df_corn_median.columns = list(map("_".join,df_corn_median.columns))
df_corn_median.columns = df_corn_median.columns.str.replace("P_median_", "P_med")
df_corn_median.columns = df_corn_median.columns.str.replace("P_len", "P_count")
df_corn_median.columns = df_corn_median.columns.str.replace("K_median_","K_med")
df_corn_median.columns = df_corn_median.columns.str.replace("K_len","K_count")
print(df_corn_median.columns)
df_corn_median = df_corn_median.reset_index()
print(df_corn_median)
file_out_median = fileOut.joinpath('corn_median.csv')  # path and filename
df_corn_median.to_csv(file_out_median, index=False)  # output to csv
print ('total number of records written to CSV:','{:,}'.format(len(df_corn_median)),'\n')


            K                                               ...      P         \
          len                                               ... median          
YEAR     1990 1991 1992 1993 1994 1995 1996 1997 1998 1999  ...   2010   2011   
COUNTY                                                      ...                 
ADAIR      30   35   37   74   21   74   68   73   46   54  ...  108.0   62.0   
ALLEN      23   41   26   49   12   20    6    7   13   24  ...   55.0   50.0   
ANDERSON   15   30   10    6   10    8    8    5    1    8  ...  269.0  150.0   
BALLARD    81   65   65   62   56   87   81   40  158  126  ...   56.0   54.0   
BARREN    101  140  171  136  105  143   50   42   37   10  ...  104.0   62.0   
...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...    ...   
WAYNE     110   62   85  114   95  141  114  171  191  149  ...   64.0   87.0   
WEBSTER   164  191  168   55   70   89   43   47  110  125  ...   58.0   42.0   
WHITLEY    15   15   11   25

#### Corn,  Set categories for P and K values to very low, low, medium, high, very high. Base values from AGR-1.

#### Categories for P
        Cat      Title       Break
        -------------------------------------
        VL       very low    P<= 5
        L        low         P>5 & P<=27
        M        medium      P>27 & P<=60
        H        high        P>60

#### Categories for K
        Cat      Title      Break
       --------------------------------------
        VL       very low   K< 100
        L        low        K>=100 & K <=190
        M        medium     K>=191 & K <=300
        H        high       K>=301 & K <=420
        VH       very high  K>420

In [25]:
df_corn_nu['CAT_P'] = ''
df_corn_nu['CAT_P'] = np.where(df_corn_nu.P <= 5, 'VL', df_corn_nu.CAT_P)
df_corn_nu['CAT_P'] = np.where(((df_corn_nu.P > 5) & (df_corn_nu.P <= 27)), 'L', df_corn_nu.CAT_P)
df_corn_nu['CAT_P'] = np.where(((df_corn_nu.P > 27) & (df_corn_nu.P <= 60)), 'M', df_corn_nu.CAT_P)
df_corn_nu['CAT_P'] = np.where((df_corn_nu.P > 60), 'H', df_corn_nu.CAT_P)
df_corn_nu.head()

Unnamed: 0,FIPS_NO,COUNTY,YEAR,P,K,CAT_P
155,1,ADAIR,1990,37.0,146.0,M
156,1,ADAIR,1990,93.0,105.0,H
157,1,ADAIR,1990,25.0,252.0,L
158,1,ADAIR,1990,24.0,121.0,L
159,1,ADAIR,1990,92.0,283.0,H


In [26]:
df_corn_nu['CAT_K'] = ''
df_corn_nu['CAT_K'] = np.where(df_corn_nu.K <= 100, 'VL', df_corn_nu.CAT_K)
df_corn_nu['CAT_K'] = np.where(((df_corn_nu.K > 100) & (df_corn_nu.K <= 190)), 'L', df_corn_nu.CAT_K)
df_corn_nu['CAT_K'] = np.where(((df_corn_nu.K > 190) & (df_corn_nu.K <= 300)), 'M', df_corn_nu.CAT_K)
df_corn_nu['CAT_K'] = np.where(((df_corn_nu.K > 300) & (df_corn_nu.K <= 420)), 'H', df_corn_nu.CAT_K)
df_corn_nu['CAT_K'] = np.where((df_corn_nu.K > 420), 'VH', df_corn_nu.CAT_K)
df_corn_nu.head()

Unnamed: 0,FIPS_NO,COUNTY,YEAR,P,K,CAT_P,CAT_K
155,1,ADAIR,1990,37.0,146.0,M,L
156,1,ADAIR,1990,93.0,105.0,H,L
157,1,ADAIR,1990,25.0,252.0,L,M
158,1,ADAIR,1990,24.0,121.0,L,L
159,1,ADAIR,1990,92.0,283.0,H,M


#### Create pivot table to sort categories by year and County for each nutrient.

#### Get median value of P and K nutrient.

In [27]:
warnings.filterwarnings("ignore")
df_corn_p = np.round( df_corn_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_P'], values=['P'],aggfunc=(np.median,len),fill_value=0),0)
df_corn_k = np.round( df_corn_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_K'], values=['K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_corn_p.head())
print(df_corn_k.head())

            P                                     ...                         \
          len                                     ... median                   
YEAR     1990            1991           1992      ...   2017      2018         
CAT_P       H   L   M VL    H  L   M VL    H   L  ...      M VL      H     L   
COUNTY                                            ...                          
ADAIR      15   4  11  0   24  7   4  0   18   4  ...   44.0  0  140.0  20.0   
ALLEN       7  10   6  0   19  4  18  0    9   5  ...   36.0  0  116.0   0.0   
ANDERSON   10   3   1  1   18  3   9  0    9   1  ...    0.0  0  147.0   8.0   
BALLARD    69   0  12  0   33  4  28  0   51   2  ...   42.0  0   74.0   0.0   
BARREN     66   9  26  0   86  9  45  0  114  12  ...   58.0  0  108.0  24.0   

                                         
                                         
YEAR                2019                 
CAT_P        M VL      H     L     M VL  
COUNTY                         

## Unpivot table and save to CSV file

#### Create column names from pivot table data

In [28]:
print(df_corn_p.columns)
df_corn_k.columns

MultiIndex([('P',    'len', '1990',  'H'),
            ('P',    'len', '1990',  'L'),
            ('P',    'len', '1990',  'M'),
            ('P',    'len', '1990', 'VL'),
            ('P',    'len', '1991',  'H'),
            ('P',    'len', '1991',  'L'),
            ('P',    'len', '1991',  'M'),
            ('P',    'len', '1991', 'VL'),
            ('P',    'len', '1992',  'H'),
            ('P',    'len', '1992',  'L'),
            ...
            ('P', 'median', '2017',  'M'),
            ('P', 'median', '2017', 'VL'),
            ('P', 'median', '2018',  'H'),
            ('P', 'median', '2018',  'L'),
            ('P', 'median', '2018',  'M'),
            ('P', 'median', '2018', 'VL'),
            ('P', 'median', '2019',  'H'),
            ('P', 'median', '2019',  'L'),
            ('P', 'median', '2019',  'M'),
            ('P', 'median', '2019', 'VL')],
           names=[None, None, 'YEAR', 'CAT_P'], length=240)


MultiIndex([('K',    'len', '1990',  'H'),
            ('K',    'len', '1990',  'L'),
            ('K',    'len', '1990',  'M'),
            ('K',    'len', '1990', 'VH'),
            ('K',    'len', '1990', 'VL'),
            ('K',    'len', '1991',  'H'),
            ('K',    'len', '1991',  'L'),
            ('K',    'len', '1991',  'M'),
            ('K',    'len', '1991', 'VH'),
            ('K',    'len', '1991', 'VL'),
            ...
            ('K', 'median', '2018',  'H'),
            ('K', 'median', '2018',  'L'),
            ('K', 'median', '2018',  'M'),
            ('K', 'median', '2018', 'VH'),
            ('K', 'median', '2018', 'VL'),
            ('K', 'median', '2019',  'H'),
            ('K', 'median', '2019',  'L'),
            ('K', 'median', '2019',  'M'),
            ('K', 'median', '2019', 'VH'),
            ('K', 'median', '2019', 'VL')],
           names=[None, None, 'YEAR', 'CAT_K'], length=300)

In [29]:
df_corn_p.columns = list(map("_".join,df_corn_p.columns))
df_corn_k.columns = list(map("_".join,df_corn_k.columns))

In [30]:
print(df_corn_p.columns)
print(df_corn_k.columns)

Index(['P_len_1990_H', 'P_len_1990_L', 'P_len_1990_M', 'P_len_1990_VL',
       'P_len_1991_H', 'P_len_1991_L', 'P_len_1991_M', 'P_len_1991_VL',
       'P_len_1992_H', 'P_len_1992_L',
       ...
       'P_median_2017_M', 'P_median_2017_VL', 'P_median_2018_H',
       'P_median_2018_L', 'P_median_2018_M', 'P_median_2018_VL',
       'P_median_2019_H', 'P_median_2019_L', 'P_median_2019_M',
       'P_median_2019_VL'],
      dtype='object', length=240)
Index(['K_len_1990_H', 'K_len_1990_L', 'K_len_1990_M', 'K_len_1990_VH',
       'K_len_1990_VL', 'K_len_1991_H', 'K_len_1991_L', 'K_len_1991_M',
       'K_len_1991_VH', 'K_len_1991_VL',
       ...
       'K_median_2018_H', 'K_median_2018_L', 'K_median_2018_M',
       'K_median_2018_VH', 'K_median_2018_VL', 'K_median_2019_H',
       'K_median_2019_L', 'K_median_2019_M', 'K_median_2019_VH',
       'K_median_2019_VL'],
      dtype='object', length=300)


In [31]:
df_corn_p.columns = df_corn_p.columns.str.replace("P_median_", "P_")
df_corn_p.columns = df_corn_p.columns.str.replace("P_len", "P_count")
df_corn_k.columns = df_corn_k.columns.str.replace("K_median_","K_")
df_corn_k.columns = df_corn_k.columns.str.replace("K_len","K_count")
print(df_corn_p.columns)
print(df_corn_k.columns)

Index(['P_count_1990_H', 'P_count_1990_L', 'P_count_1990_M', 'P_count_1990_VL',
       'P_count_1991_H', 'P_count_1991_L', 'P_count_1991_M', 'P_count_1991_VL',
       'P_count_1992_H', 'P_count_1992_L',
       ...
       'P_2017_M', 'P_2017_VL', 'P_2018_H', 'P_2018_L', 'P_2018_M',
       'P_2018_VL', 'P_2019_H', 'P_2019_L', 'P_2019_M', 'P_2019_VL'],
      dtype='object', length=240)
Index(['K_count_1990_H', 'K_count_1990_L', 'K_count_1990_M', 'K_count_1990_VH',
       'K_count_1990_VL', 'K_count_1991_H', 'K_count_1991_L', 'K_count_1991_M',
       'K_count_1991_VH', 'K_count_1991_VL',
       ...
       'K_2018_H', 'K_2018_L', 'K_2018_M', 'K_2018_VH', 'K_2018_VL',
       'K_2019_H', 'K_2019_L', 'K_2019_M', 'K_2019_VH', 'K_2019_VL'],
      dtype='object', length=300)


#### Reindex unpivot table 

In [32]:
df_corn_p = df_corn_p.reset_index()
df_corn_k = df_corn_k.reset_index()
print(df_corn_p.head())
print(df_corn_k.head())

     COUNTY  P_count_1990_H  P_count_1990_L  P_count_1990_M  P_count_1990_VL  \
0     ADAIR              15               4              11                0   
1     ALLEN               7              10               6                0   
2  ANDERSON              10               3               1                1   
3   BALLARD              69               0              12                0   
4    BARREN              66               9              26                0   

   P_count_1991_H  P_count_1991_L  P_count_1991_M  P_count_1991_VL  \
0              24               7               4                0   
1              19               4              18                0   
2              18               3               9                0   
3              33               4              28                0   
4              86               9              45                0   

   P_count_1992_H  ...  P_2017_M  P_2017_VL  P_2018_H  P_2018_L  P_2018_M  \
0              18  ..

### Merge dataframes into one file 

In [33]:
corn_level = df_corn_p.merge(df_corn_k, left_on='COUNTY', right_on='COUNTY')
print(corn_level)

       COUNTY  P_count_1990_H  P_count_1990_L  P_count_1990_M  \
0       ADAIR              15               4              11   
1       ALLEN               7              10               6   
2    ANDERSON              10               3               1   
3     BALLARD              69               0              12   
4      BARREN              66               9              26   
..        ...             ...             ...             ...   
115     WAYNE              69              10              31   
116   WEBSTER              54              41              69   
117   WHITLEY               4               4               7   
118     WOLFE              13               1               6   
119  WOODFORD              25               3               3   

     P_count_1990_VL  P_count_1991_H  P_count_1991_L  P_count_1991_M  \
0                  0              24               7               4   
1                  0              19               4              18   
2  

#### Save categorized data to file. Separate by crop and nutrient type (P and K) with count by category.

In [34]:
file_out_level = fileOut.joinpath('corn_levels.csv')
corn_level.to_csv(file_out_level, index=False)
print ('total number of records written to CSV:','{:,}'.format(len(corn_level)),'\n')

total number of records written to CSV: 120 



## Soybeans

#### Create list to select Soybeans from database.

In [35]:
soy_sel = ['Soybeans', 'Small Grains/Soybeans', 'Wheat/Soybeans', 'Canola/Soybeans', 'Rye/Soybeans', 'Oats/Soybeans', 'Barley/Soybeans', 'Triticale/Soybeans']
soy_sel.sort()
print(soy_sel)

['Barley/Soybeans', 'Canola/Soybeans', 'Oats/Soybeans', 'Rye/Soybeans', 'Small Grains/Soybeans', 'Soybeans', 'Triticale/Soybeans', 'Wheat/Soybeans']


#### Select soybeans from dataset.

In [36]:
df_soy = df[df.CROP.isin(soy_sel)]
df_soy_nu = df_soy[['FIPS_NO','COUNTY','YEAR','P','K']].copy()
print(df_soy_nu.head())

     FIPS_NO COUNTY  YEAR      P      K
628        1  ADAIR  1991  238.0  318.0
1879       1  ADAIR  1995   83.0  173.0
1880       1  ADAIR  1995   59.0  150.0
1881       1  ADAIR  1995   65.0  152.0
1882       1  ADAIR  1995  148.0  317.0


### Calculate median for each year by County

In [37]:
df_soy_median = np.round( df_soy_nu.pivot_table(index='COUNTY', columns=['YEAR'], values=['P','K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_soy_median)
print(df_soy_median.columns)
df_soy_median.columns = list(map("_".join,df_soy_median.columns))
df_soy_median.columns = df_soy_median.columns.str.replace("P_median_", "P_med")
df_soy_median.columns = df_soy_median.columns.str.replace("P_len", "P_count")
df_soy_median.columns = df_soy_median.columns.str.replace("K_median_","K_med")
df_soy_median.columns = df_soy_median.columns.str.replace("K_len","K_count")
print(df_soy_median.columns)
df_soy_median = df_soy_median.reset_index()
print(df_soy_median)
file_out_median = fileOut.joinpath('soy_median.csv')  # path and filename
df_soy_median.to_csv(file_out_median, index=False)  # output to csv
print ('total number of records written to CSV:','{:,}'.format(len(df_soy_median)),'\n')

            K                                               ...      P         \
          len                                               ... median          
YEAR     1990 1991 1992 1993 1994 1995 1996 1997 1998 1999  ...   2010   2011   
COUNTY                                                      ...                 
ADAIR       0    1    0    0    0    4    0    2    5    0  ...   86.0   88.0   
ALLEN       5    3    3   15    0   21   11    2    5    2  ...  127.0   43.0   
ANDERSON    0   12    1    1    1    0    0    0    0    2  ...  290.0  367.0   
BALLARD    58   36   11   58   30   54   59   89  111  124  ...   50.0   62.0   
BARREN     16   26   30   14   22   19   15   13    5    0  ...  234.0   85.0   
...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...    ...   
WAYNE      69   39   50   60   44  105   35   62   77   59  ...   80.0   95.0   
WEBSTER    40  102   58   41   63  106   30   44   64  102  ...   69.0   36.0   
WHITLEY     1    2    4    1

#### Soybeans, Set categories for P and K values to very low, low, medium, high, very high. Base values from AGR-1.

#### Categories for P
        Cat      Title       Break
        -------------------------------------
        VL       very low    P<= 5
        L        low         P>5 & P<=27
        M        medium      P>27 & P<=60
        H        high        P>60

#### Categories for K
        Cat      Title      Break
       --------------------------------------
        VL       very low   K< 100
        L        low        K>=100 & K <=190
        M        medium     K>=191 & K <=300
        H        high       K>300

In [38]:
df_soy_nu['CAT_P'] = ''
df_soy_nu['CAT_P'] = np.where(df_soy_nu.P <= 5, 'VL', df_soy_nu.CAT_P)
df_soy_nu['CAT_P'] = np.where(((df_soy_nu.P > 5) & (df_soy_nu.P <= 27)), 'L', df_soy_nu.CAT_P)
df_soy_nu['CAT_P'] = np.where(((df_soy_nu.P > 27) & (df_soy_nu.P <= 60)), 'M', df_soy_nu.CAT_P)
df_soy_nu['CAT_P'] = np.where((df_soy_nu.P > 60), 'H', df_soy_nu.CAT_P)

df_soy_nu['CAT_K'] = ''
df_soy_nu['CAT_K'] = np.where(df_soy_nu.K <= 99, 'VL', df_soy_nu.CAT_K)
df_soy_nu['CAT_K'] = np.where(((df_soy_nu.K > 99) & (df_soy_nu.K <= 190)), 'L', df_soy_nu.CAT_K)
df_soy_nu['CAT_K'] = np.where(((df_soy_nu.K > 190) & (df_soy_nu.K <= 300)), 'M', df_soy_nu.CAT_K)
df_soy_nu['CAT_K'] = np.where((df_soy_nu.K > 300), 'H', df_soy_nu.CAT_K)


In [39]:
warnings.filterwarnings("ignore")
df_soy_p = np.round( df_soy_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_P'], values=['P'],aggfunc=(np.median,len),fill_value=0),2)
df_soy_k = np.round( df_soy_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_K'], values=['K'],aggfunc=(np.median,len),fill_value=0),2)

In [40]:
df_soy_p.columns
df_soy_k.columns
df_soy_p.columns = list(map("_".join,df_soy_p.columns))
df_soy_k.columns = list(map("_".join,df_soy_k.columns))
df_soy_p.columns = df_soy_p.columns.str.replace("P_median_", "P_")
df_soy_p.columns = df_soy_p.columns.str.replace("P_len", "P_count")
df_soy_k.columns = df_soy_k.columns.str.replace("K_median_","K_")
df_soy_k.columns = df_soy_k.columns.str.replace("K_len","K_count")
df_soy_p = df_soy_p.reset_index()
df_soy_k = df_soy_k.reset_index()

soy_level = df_soy_p.merge(df_soy_k, left_on='COUNTY', right_on='COUNTY')

file_out_level = fileOut.joinpath('soy_levels.csv')
soy_level.to_csv(file_out_level, index=False)
print ('total number of records written to CSV:','{:,}'.format(len(soy_level)),'\n')

total number of records written to CSV: 117 



## Canola

#### Create list to select Canola from database.

In [41]:
canola_sel = ['Canola', 'Canola/Soybeans']
canola_sel.sort()
print(canola_sel)

['Canola', 'Canola/Soybeans']


#### Select Canola from dataset.

In [42]:
df_canola = df[df.CROP.isin(canola_sel)]
df_canola_nu = df_canola[['FIPS_NO','COUNTY','YEAR','P','K']].copy()
print(df_canola_nu.head())

      FIPS_NO     COUNTY  YEAR     P      K
14175     101  HENDERSON  1991  29.0  200.0
14176     101  HENDERSON  1991  36.0  163.0
14177     101  HENDERSON  1991  43.0  250.0
14178     101  HENDERSON  1991  25.0  163.0
14179     101  HENDERSON  1991  36.0  158.0


### Calculate median for each year by County

In [43]:
df_canola_median = np.round( df_canola_nu.pivot_table(index='COUNTY', columns=['YEAR'], values=['P','K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_canola_median)
print(df_canola_median.columns)
df_canola_median.columns = list(map("_".join,df_canola_median.columns))
df_canola_median.columns = df_canola_median.columns.str.replace("P_median_", "P_med")
df_canola_median.columns = df_canola_median.columns.str.replace("P_len", "P_count")
df_canola_median.columns = df_canola_median.columns.str.replace("K_median_","K_med")
df_canola_median.columns = df_canola_median.columns.str.replace("K_len","K_count")
print(df_canola_median.columns)
df_canola_median = df_canola_median.reset_index()
print(df_canola_median)
file_out_median = fileOut.joinpath('canola_median.csv')  # path and filename
df_canola_median.to_csv(file_out_median, index=False)  # output to csv
print ('total number of records written to CSV:','{:,}'.format(len(df_canola_median)),'\n')

              K                                               ...      P       \
            len                                               ... median        
YEAR       1990 1991 1992 1993 1994 1995 1996 1997 1998 1999  ...   2010 2011   
COUNTY                                                        ...               
ALLEN         0    0    0    0    0    0    0    0    0    0  ...      0  0.0   
BARREN        0    2    0    0    0    0    0    0    0    0  ...      0  0.0   
BOONE         0    0    0    0    2    0    0    0    0    0  ...      0  0.0   
BOURBON       1    0    0    0    0    0    0    0    0    0  ...      0  0.0   
BOYLE         0    0    0    0    0    0    0    0    1    0  ...      0  0.0   
...         ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...  ...   
WARREN        0    1    0    0    1    0    0    0    0    0  ...      0  0.0   
WASHINGTON    0    0    0    0    0    0    0    0    1    0  ...      0  0.0   
WAYNE         0    0    0   

#### Canola, Set categories for P and K values to very low, low, medium, high, very high. Base values from AGR-1.

#### Categories for P
    Cat      Title       Break
    -------------------------------------
    VL       very low    P< 10
    L        low         P>= 10 & P<=30
    M        medium      P>30 & P<=60
    H        high        P>60
    
#### Categories for K
    Cat      Title      Break
   --------------------------------------
    VL       very low   K< 104
    L        low        K>=104 & K <=186
    M        medium     K>=187 & K <=300
    H        high       K>300

In [44]:
df_canola_nu['CAT_P'] = ''
df_canola_nu['CAT_P'] = np.where(df_canola_nu.P < 10, 'VL', df_canola_nu.CAT_P)
df_canola_nu['CAT_P'] = np.where(((df_canola_nu.P > 10) & (df_canola_nu.P <= 30)), 'L', df_canola_nu.CAT_P)
df_canola_nu['CAT_P'] = np.where(((df_canola_nu.P > 30) & (df_canola_nu.P <= 60)), 'M', df_canola_nu.CAT_P)
df_canola_nu['CAT_P'] = np.where((df_canola_nu.P > 60), 'H', df_canola_nu.CAT_P)

df_canola_nu['CAT_K'] = ''
df_canola_nu['CAT_K'] = np.where(df_canola_nu.K < 104, 'VL', df_canola_nu.CAT_K)
df_canola_nu['CAT_K'] = np.where(((df_canola_nu.K > 104) & (df_canola_nu.K <= 186)), 'L', df_canola_nu.CAT_K)
df_canola_nu['CAT_K'] = np.where(((df_canola_nu.K > 186) & (df_canola_nu.K <= 300)), 'M', df_canola_nu.CAT_K)
df_canola_nu['CAT_K'] = np.where((df_canola_nu.K > 300), 'H', df_canola_nu.CAT_K)

In [45]:
warnings.filterwarnings("ignore")
df_canola_p = np.round( df_canola_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_P'], values=['P'],aggfunc=(np.median,len),fill_value=0),2)
df_canola_k = np.round( df_canola_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_K'], values=['K'],aggfunc=(np.median,len),fill_value=0),2)

In [46]:
df_canola_p.columns
df_canola_k.columns
df_canola_p.columns = list(map("_".join,df_canola_p.columns))
df_canola_k.columns = list(map("_".join,df_canola_k.columns))
df_canola_p.columns = df_canola_p.columns.str.replace("P_median_", "P_")
df_canola_p.columns = df_canola_p.columns.str.replace("P_len", "P_count")
df_canola_k.columns = df_canola_k.columns.str.replace("K_median_","K_")
df_canola_k.columns = df_canola_k.columns.str.replace("K_len","K_count")
df_canola_p = df_canola_p.reset_index()
df_canola_k = df_canola_k.reset_index()


canola_level = df_canola_p.merge(df_canola_k, left_on='COUNTY', right_on='COUNTY')

file_out_level = fileOut.joinpath('canola_levels.csv')
canola_level.to_csv(file_out_level, index=False)
print ('total number of records written to CSV:','{:,}'.format(len(canola_level)),'\n')

total number of records written to CSV: 65 



## Sorghum

#### Create list to select Sorghum from database.

In [47]:
sorghum_sel = ['Grain Sorghum']
sorghum_sel.sort()
print(sorghum_sel)

['Grain Sorghum']


#### Select Sorghum from dataset.

In [48]:
df_sorghum = df[df.CROP.isin(sorghum_sel)]
df_sorghum_nu = df_sorghum[['FIPS_NO','COUNTY','YEAR','P','K']].copy()
print(df_sorghum_nu.head())

      FIPS_NO     COUNTY  YEAR      P       K
2237        1      ADAIR  1996   67.0   284.0
2532        1      ADAIR  1997  318.0   303.0
18439     101  HENDERSON  2007   42.0  1281.0
18440     101  HENDERSON  2007  120.0  1499.0
19885     101  HENDERSON  2014   48.0   110.0


### Calculate median for each year by County

In [49]:
df_sorghum_median = np.round( df_sorghum_nu.pivot_table(index='COUNTY', columns=['YEAR'], values=['P','K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_sorghum_median)
print(df_sorghum_median.columns)
df_sorghum_median.columns = list(map("_".join,df_sorghum_median.columns))
df_sorghum_median.columns = df_sorghum_median.columns.str.replace("P_median_", "P_med")
df_sorghum_median.columns = df_sorghum_median.columns.str.replace("P_len", "P_count")
df_sorghum_median.columns = df_sorghum_median.columns.str.replace("K_median_","K_med")
df_sorghum_median.columns = df_sorghum_median.columns.str.replace("K_len","K_count")
print(df_sorghum_median.columns)
df_sorghum_median = df_sorghum_median.reset_index()
print(df_sorghum_median)
file_out_median = fileOut.joinpath('sorghum_median.csv')  # path and filename
df_sorghum_median.to_csv(file_out_median, index=False)  # output to csv
print ('total number of records written to CSV:','{:,}'.format(len(df_sorghum_median)),'\n')

              K                                               ...      P  \
            len                                               ... median   
YEAR       1990 1991 1992 1993 1994 1995 1996 1997 1998 1999  ...   2010   
COUNTY                                                        ...          
ADAIR         0    0    0    0    0    0    1    1    0    0  ...    0.0   
ALLEN         0    0    0    0    0    0    0    0    0    0  ...    0.0   
BALLARD      11    0    5    3    6    7    1    5    8    0  ...    0.0   
BATH          0    0    0    0    0    0    0    0    0    0  ...   15.0   
BELL          0    0    0    0    0    0    1    0    0    0  ...    0.0   
...         ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...   
TRIGG         0    0    1    0    0    0    0    0    0    2  ...  141.0   
UNION         0    1    0    0    0    0    0    0    0    0  ...    0.0   
WASHINGTON    0    0    5    0    1    2    0    0    0    0  ...    0.0   
WEBSTER     

#### Sorghum, Set categories for P and K values to very low, low, medium, high, very high. Base values from AGR-1.

#### Categories for P
    Cat      Title       Break
    -------------------------------------
    VL       very low    P< 6
    L        low         P>= 6 & P<=27
    M        medium      P>27 & P<=60
    H        high        P>60
    
#### Categories for K
    Cat      Title      Break
   --------------------------------------
    VL       very low   K< 100
    L        low        K>=100 & K <=190
    M        medium     K>=191 & K <=300
    H        high       K>300

In [50]:
df_sorghum_nu['CAT_P'] = ''
df_sorghum_nu['CAT_P'] = np.where(df_sorghum_nu.P < 6, 'VL', df_sorghum_nu.CAT_P)
df_sorghum_nu['CAT_P'] = np.where(((df_sorghum_nu.P >= 6) & (df_sorghum_nu.P <= 27)), 'L', df_sorghum_nu.CAT_P)
df_sorghum_nu['CAT_P'] = np.where(((df_sorghum_nu.P > 27) & (df_sorghum_nu.P <= 60)), 'M', df_sorghum_nu.CAT_P)
df_sorghum_nu['CAT_P'] = np.where((df_sorghum_nu.P > 60), 'H', df_sorghum_nu.CAT_P)

df_sorghum_nu['CAT_K'] = ''
df_sorghum_nu['CAT_K'] = np.where(df_sorghum_nu.K < 100, 'VL', df_sorghum_nu.CAT_K)
df_sorghum_nu['CAT_K'] = np.where(((df_sorghum_nu.K >= 100) & (df_sorghum_nu.K <= 190)), 'L', df_sorghum_nu.CAT_K)
df_sorghum_nu['CAT_K'] = np.where(((df_sorghum_nu.K > 190) & (df_sorghum_nu.K <= 300)), 'M', df_sorghum_nu.CAT_K)
df_sorghum_nu['CAT_K'] = np.where((df_sorghum_nu.K > 300), 'H', df_sorghum_nu.CAT_K)

In [51]:
warnings.filterwarnings("ignore")
df_sorghum_p = np.round( df_sorghum_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_P'], values=['P'],aggfunc=(np.median,len),fill_value=0),2)
df_sorghum_k = np.round( df_sorghum_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_K'], values=['K'],aggfunc=(np.median,len),fill_value=0),2)

df_sorghum_p.columns
df_sorghum_k.columns
df_sorghum_p.columns = list(map("_".join,df_sorghum_p.columns))
df_sorghum_k.columns = list(map("_".join,df_sorghum_k.columns))
df_sorghum_p.columns = df_sorghum_p.columns.str.replace("P_median_", "P_")
df_sorghum_p.columns = df_sorghum_p.columns.str.replace("P_len", "P_count")
df_sorghum_k.columns = df_sorghum_k.columns.str.replace("K_median_","K_")
df_sorghum_k.columns = df_sorghum_k.columns.str.replace("K_len","K_count")
df_sorghum_p = df_sorghum_p.reset_index()
df_sorghum_k = df_sorghum_k.reset_index()


sorghum_level = df_sorghum_p.merge(df_sorghum_k, left_on='COUNTY', right_on='COUNTY')

file_out_level = fileOut.joinpath('sorghum_levels.csv')
sorghum_level.to_csv(file_out_level, index=False)
print ('total number of records written to CSV:','{:,}'.format(len(sorghum_level)),'\n')


total number of records written to CSV: 73 



## Small Grains

#### Create list to select Small Grains from database.

In [52]:
smallgrains_sel = ['Barley' , 'Barley/Soybeans', 'Grain Crops (multiple)','Oats','Oats/Soybeans', 'Rye/Soybeans', 'Small Grains', 'Small Grains/Corn', 'Small Grains/Soybeans', 'Triticale', 'Triticale/Soybeans', 'Wheat', 'Wheat/Corn', 'Wheat/Soybeans']
smallgrains_sel.sort()
print(smallgrains_sel)

['Barley', 'Barley/Soybeans', 'Grain Crops (multiple)', 'Oats', 'Oats/Soybeans', 'Rye/Soybeans', 'Small Grains', 'Small Grains/Corn', 'Small Grains/Soybeans', 'Triticale', 'Triticale/Soybeans', 'Wheat', 'Wheat/Corn', 'Wheat/Soybeans']


#### Select Small Grains from dataset.

In [53]:
df_smallgrains = df[df.CROP.isin(smallgrains_sel)]
df_smallgrains_nu = df_smallgrains[['FIPS_NO','COUNTY','YEAR','P','K']].copy()
print(df_smallgrains_nu.head())

     FIPS_NO COUNTY  YEAR     P      K
635        1  ADAIR  1991  16.0  234.0
901        1  ADAIR  1992  51.0  203.0
969        1  ADAIR  1992  29.0  152.0
970        1  ADAIR  1992  15.0  193.0
1333       1  ADAIR  1993  16.0   94.0


### Calculate median for each year by County.

In [54]:
df_smallgrains_median = np.round( df_smallgrains_nu.pivot_table(index='COUNTY', columns=['YEAR'], values=['P','K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_smallgrains_median)
print(df_smallgrains_median.columns)
df_smallgrains_median.columns = list(map("_".join,df_smallgrains_median.columns))
df_smallgrains_median.columns = df_smallgrains_median.columns.str.replace("P_median_", "P_med")
df_smallgrains_median.columns = df_smallgrains_median.columns.str.replace("P_len", "P_count")
df_smallgrains_median.columns = df_smallgrains_median.columns.str.replace("K_median_","K_med")
df_smallgrains_median.columns = df_smallgrains_median.columns.str.replace("K_len","K_count")
print(df_smallgrains_median.columns)
df_smallgrains_median = df_smallgrains_median.reset_index()
print(df_smallgrains_median)
file_out_median = fileOut.joinpath('smallgrains_median.csv')  # path and filename
df_smallgrains_median.to_csv(file_out_median, index=False)  # output to csv
print ('total number of records written to CSV:','{:,}'.format(len(df_smallgrains_median)),'\n')


            K                                               ...      P         \
          len                                               ... median          
YEAR     1990 1991 1992 1993 1994 1995 1996 1997 1998 1999  ...   2010   2011   
COUNTY                                                      ...                 
ADAIR       0    1    3    1    1   11   19   18    6    9  ...   92.0  202.0   
ALLEN      15    5    0   10    6    7    3    4    0    2  ...   39.0   53.0   
ANDERSON    0    1    1    0    0    0    1    0    0    0  ...   62.0    0.0   
BALLARD    35   11    5   13   19   13   22   23   31   46  ...  186.0    0.0   
BARREN     11    6   11    7   33   41    7    6    0    5  ...  246.0    0.0   
...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...    ...   
WAYNE      24   17   10   31   29   21   30   30   54   30  ...  137.0  151.0   
WEBSTER    79  195  117   50   50   66   13   11    2   34  ...    0.0    0.0   
WHITLEY     0    2    0    0

#### Small Grains, Set categories for P and K values to very low, low, medium, high, very high. Base values from AGR-1.

#### Categories for P
    Cat      Title       Break
    -------------------------------------
    VL       very low    P< 10
    L        low         P>= 10 & P<=30
    M        medium      P>30 & P<=60
    H        high        P>60
    
#### Categories for K
    Cat      Title      Break
   --------------------------------------
    VL       very low   K< 104
    L        low        K>=104 & K <=186
    M        medium     K>=187 & K <=300
    H        high       K>300

In [55]:
df_smallgrains_nu['CAT_P'] = ''
df_smallgrains_nu['CAT_P'] = np.where(df_smallgrains_nu.P < 10, 'VL', df_smallgrains_nu.CAT_P)
df_smallgrains_nu['CAT_P'] = np.where(((df_smallgrains_nu.P > 10) & (df_smallgrains_nu.P <= 30)), 'L', df_smallgrains_nu.CAT_P)
df_smallgrains_nu['CAT_P'] = np.where(((df_smallgrains_nu.P > 30) & (df_smallgrains_nu.P <= 60)), 'M', df_smallgrains_nu.CAT_P)
df_smallgrains_nu['CAT_P'] = np.where((df_smallgrains_nu.P > 60), 'H', df_smallgrains_nu.CAT_P)

df_smallgrains_nu['CAT_K'] = ''
df_smallgrains_nu['CAT_K'] = np.where(df_smallgrains_nu.K < 104, 'VL', df_smallgrains_nu.CAT_K)
df_smallgrains_nu['CAT_K'] = np.where(((df_smallgrains_nu.K >= 104) & (df_smallgrains_nu.K <= 186)), 'L', df_smallgrains_nu.CAT_K)
df_smallgrains_nu['CAT_K'] = np.where(((df_smallgrains_nu.K > 186) & (df_smallgrains_nu.K <= 300)), 'M', df_smallgrains_nu.CAT_K)
df_smallgrains_nu['CAT_K'] = np.where((df_smallgrains_nu.K > 300), 'H', df_smallgrains_nu.CAT_K)

In [56]:
warnings.filterwarnings("ignore")
df_smallgrains_p = np.round( df_smallgrains_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_P'], values=['P'],aggfunc=(np.median,len),fill_value=0),2)
df_smallgrains_k = np.round( df_smallgrains_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_K'], values=['K'],aggfunc=(np.median,len),fill_value=0),2)

df_smallgrains_p.columns
df_smallgrains_k.columns
df_smallgrains_p.columns = list(map("_".join,df_smallgrains_p.columns))
df_smallgrains_k.columns = list(map("_".join,df_smallgrains_k.columns))
df_smallgrains_p.columns = df_smallgrains_p.columns.str.replace("P_median_", "P_")
df_smallgrains_p.columns = df_smallgrains_p.columns.str.replace("P_len", "P_count")
df_smallgrains_k.columns = df_smallgrains_k.columns.str.replace("K_median_","K_")
df_smallgrains_k.columns = df_smallgrains_k.columns.str.replace("K_len","K_count")
df_smallgrains_p = df_smallgrains_p.reset_index()
df_smallgrains_k = df_smallgrains_k.reset_index()


smallgrains_level = df_smallgrains_p.merge(df_smallgrains_k, left_on='COUNTY', right_on='COUNTY')

file_out_level = fileOut.joinpath('smallgrains_levels.csv')
smallgrains_level.to_csv(file_out_level, index=False)
print ('total number of records written to CSV:','{:,}'.format(len(smallgrains_level)),'\n')


total number of records written to CSV: 119 



## Tobacco

#### Create list to select Tobacco from database.

In [57]:
tobacco_sel = ['Burley Tobacco', 'Dark Tobacco']
tobacco_sel.sort()
print(tobacco_sel)

['Burley Tobacco', 'Dark Tobacco']


#### Select Tobacco from dataset.

In [58]:
df_tobacco = df[df.CROP.isin(tobacco_sel)]
df_tobacco_nu = df_tobacco[['FIPS_NO','COUNTY','YEAR','P','K']].copy()
print(df_tobacco_nu.head())

   FIPS_NO COUNTY  YEAR      P      K
24       1  ADAIR  1990  282.0  298.0
25       1  ADAIR  1990  206.0  611.0
26       1  ADAIR  1990   71.0  120.0
27       1  ADAIR  1990  124.0  320.0
28       1  ADAIR  1990  300.0  283.0


### Calculate median for each year by County.

In [59]:
df_tobacco_median = np.round( df_tobacco_nu.pivot_table(index='COUNTY', columns=['YEAR'], values=['P','K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_tobacco_median)
print(df_tobacco_median.columns)
df_tobacco_median.columns = list(map("_".join,df_tobacco_median.columns))
df_tobacco_median.columns = df_tobacco_median.columns.str.replace("P_median_", "P_med")
df_tobacco_median.columns = df_tobacco_median.columns.str.replace("P_len", "P_count")
df_tobacco_median.columns = df_tobacco_median.columns.str.replace("K_median_","K_med")
df_tobacco_median.columns = df_tobacco_median.columns.str.replace("K_len","K_count")
print(df_tobacco_median.columns)
df_tobacco_median = df_tobacco_median.reset_index()
print(df_tobacco_median)
file_out_median = fileOut.joinpath('tobacco_median.csv')  # path and filename
df_tobacco_median.to_csv(file_out_median, index=False)  # output to csv
print ('total number of records written to CSV:','{:,}'.format(len(df_tobacco_median)),'\n')

            K                                               ...      P         \
          len                                               ... median          
YEAR     1990 1991 1992 1993 1994 1995 1996 1997 1998 1999  ...   2010   2011   
COUNTY                                                      ...                 
ADAIR      69   86  100   89   53   80  111  116   79  103  ...  139.0  156.0   
ALLEN      80   90   82   69   43   22   40   25   27   10  ...  151.0   72.0   
ANDERSON  122  137  116   84   71   56   73   68   88   52  ...  355.0  284.0   
BALLARD    43   51   80   58   36   49   60   69   84   97  ...  145.0    0.0   
BARREN    167  176  291  291  187  147  111   92   45   37  ...  144.0   60.0   
...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...    ...   
WAYNE     102   98  119   95   62   87   73  158  100   92  ...  126.0  135.0   
WEBSTER     4   16   16    6   10   15   12   10   12   16  ...   82.0   85.0   
WHITLEY    37   63   75   39

#### Tobacco, Set categories for P and K values to very low, low, medium, high, very high. Base values from AGR-1.
Combined Burley and Dark Tobacco K into Burley recommendations for K categories.

#### Categories for P
    Cat      Title       Break
    -------------------------------------
    VL       very low    P< 7
    L        low         P>= 7 & P<=28
    M        medium      P>28 & P<=57
    H        high        P>57 & P<=79
    VH       very high   P> 80
    

#### Categories for K
    Cat      Title      Break
   --------------------------------------
    VL       very low   K< 96
    L        low        K>=96 & K <=205
    M        medium     K>205 & K <=303
    H        high       K>303 & K <=449
    VH       very high  K> 450

In [60]:
df_tobacco_nu['CAT_P'] = ''
df_tobacco_nu['CAT_P'] = np.where(df_tobacco_nu.P < 7, 'VL', df_tobacco_nu.CAT_P)
df_tobacco_nu['CAT_P'] = np.where(((df_tobacco_nu.P >= 7) & (df_tobacco_nu.P <= 28)), 'L', df_tobacco_nu.CAT_P)
df_tobacco_nu['CAT_P'] = np.where(((df_tobacco_nu.P > 28) & (df_tobacco_nu.P <= 57)), 'M', df_tobacco_nu.CAT_P)
df_tobacco_nu['CAT_P'] = np.where(((df_tobacco_nu.P > 57) &  (df_tobacco_nu.P <= 79)), 'H', df_tobacco_nu.CAT_P)
df_tobacco_nu['CAT_P'] = np.where(df_tobacco_nu.P > 80, 'VH', df_tobacco_nu.CAT_P)
df_tobacco_nu['CAT_K'] = ''
df_tobacco_nu['CAT_K'] = np.where(df_tobacco_nu.K < 96, 'VL', df_tobacco_nu.CAT_K)
df_tobacco_nu['CAT_K'] = np.where(((df_tobacco_nu.K >= 96) & (df_tobacco_nu.K <= 205)), 'L', df_tobacco_nu.CAT_K)
df_tobacco_nu['CAT_K'] = np.where(((df_tobacco_nu.K > 205) & (df_tobacco_nu.K <= 303)), 'M', df_tobacco_nu.CAT_K)
df_tobacco_nu['CAT_K'] = np.where(((df_tobacco_nu.K > 303) & (df_tobacco_nu.K <= 449)), 'H', df_tobacco_nu.CAT_K)
df_tobacco_nu['CAT_K'] = np.where(df_tobacco_nu.K > 450, 'VH', df_tobacco_nu.CAT_K)


In [61]:
warnings.filterwarnings("ignore")
df_tobacco_p = np.round( df_tobacco_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_P'], values=['P'],aggfunc=(np.median,len),fill_value=0),2)
df_tobacco_k = np.round( df_tobacco_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_K'], values=['K'],aggfunc=(np.median,len),fill_value=0),2)

df_tobacco_p.columns
df_tobacco_k.columns
df_tobacco_p.columns = list(map("_".join,df_tobacco_p.columns))
df_tobacco_k.columns = list(map("_".join,df_tobacco_k.columns))
df_tobacco_p.columns = df_tobacco_p.columns.str.replace("P_median_", "P_")
df_tobacco_p.columns = df_tobacco_p.columns.str.replace("P_len", "P_count")
df_tobacco_k.columns = df_tobacco_k.columns.str.replace("K_median_","K_")
df_tobacco_k.columns = df_tobacco_k.columns.str.replace("K_len","K_count")
df_tobacco_p = df_tobacco_p.reset_index()
df_tobacco_k = df_tobacco_k.reset_index()

tobacco_level = df_tobacco_p.merge(df_tobacco_k, left_on='COUNTY', right_on='COUNTY')

file_out_level = fileOut.joinpath('tobacco_levels.csv')
tobacco_level.to_csv(file_out_level, index=False)
print ('total number of records written to CSV:','{:,}'.format(len(tobacco_level)),'\n')


total number of records written to CSV: 120 



## Warm Season Grass

#### Create list to select Warm Season Grass from database.

In [62]:
warmseason_sel = ['Bermudagrass', 'Bermudagrass, common', 'Bermudagrass, improved', 'Bluestem', 'Indiangrass', 'Millet', 'Sorghum Sudangrass', 'Sorghum/Sudangras', 'Sudangrass', 'Switchgrass', 'Warm Season Annual Grass', 'Warm Season Grass', 'Warm Season Native Grass', 'Zoyiagrass', 'Zoysiagrass']
warmseason_sel.sort()
print(warmseason_sel)

['Bermudagrass', 'Bermudagrass, common', 'Bermudagrass, improved', 'Bluestem', 'Indiangrass', 'Millet', 'Sorghum Sudangrass', 'Sorghum/Sudangras', 'Sudangrass', 'Switchgrass', 'Warm Season Annual Grass', 'Warm Season Grass', 'Warm Season Native Grass', 'Zoyiagrass', 'Zoysiagrass']


#### Select Warm Season Grass from dataset.

In [63]:
 df_warmseason = df[df.CROP.isin(warmseason_sel)]
 df_warmseason_nu = df_warmseason[['FIPS_NO','COUNTY','YEAR','P','K']].copy()
 print(df_warmseason_nu.head())

     FIPS_NO COUNTY  YEAR     P      K
968        1  ADAIR  1992  89.0  194.0
1564       1  ADAIR  1994  93.0  153.0
1887       1  ADAIR  1995  26.0  178.0
2266       1  ADAIR  1996  58.0  129.0
2606       1  ADAIR  1997  94.0  214.0


### Calculate median by year for each County.

In [64]:
df_warmseason_median = np.round( df_warmseason_nu.pivot_table(index='COUNTY', columns=['YEAR'], values=['P','K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_warmseason_median)
print(df_warmseason_median.columns)
df_warmseason_median.columns = list(map("_".join,df_warmseason_median.columns))
df_warmseason_median.columns = df_warmseason_median.columns.str.replace("P_median_", "P_med")
df_warmseason_median.columns = df_warmseason_median.columns.str.replace("P_len", "P_count")
df_warmseason_median.columns = df_warmseason_median.columns.str.replace("K_median_","K_med")
df_warmseason_median.columns = df_warmseason_median.columns.str.replace("K_len","K_count")
print(df_warmseason_median.columns)
df_warmseason_median = df_warmseason_median.reset_index()
print(df_warmseason_median)
file_out_median = fileOut.joinpath('warmseason_median.csv')  # path and filename
df_warmseason_median.to_csv(file_out_median, index=False)  # output to csv
print ('total number of records written to CSV:','{:,}'.format(len(df_warmseason_median)),'\n')

            K                                               ...      P  \
          len                                               ... median   
YEAR     1990 1991 1992 1993 1994 1995 1996 1997 1998 1999  ...   2010   
COUNTY                                                      ...          
ADAIR       0    0    1    0    1    1    1    1    2    4  ...    0.0   
ALLEN       3    1    0    0    1    0    0    0    2    2  ...   89.0   
ANDERSON    0    0    0    0    0    0    0    3    0    0  ...  128.0   
BALLARD     1    0    0    1    2    1    0    0    1    2  ...   34.0   
BARREN      3    2    3   12    2    5    4    0    1    0  ...   64.0   
...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...   
WAYNE       0    0    0    1    0    0    0    0    1    0  ...  224.0   
WEBSTER     0    1    0    0    0    0    0    0   20   19  ...   19.0   
WHITLEY     0    0    0    0    0    0    0    0    1    1  ...    0.0   
WOLFE       0    0    0    0    0    0

#### Warm Season Grass, Set categories for P and K values to very low, low, medium, high, very high. Base values from AGR-1.

#### Categories for P
    Cat      Title       Break
    -------------------------------------
    VL       very low    P< 10
    L        low         P>= 10 & P<=30
    M        medium      P>30 & P<=60
    H        high        P>60
    
#### Categories for K
    Cat      Title      Break
   --------------------------------------
    VL       very low   K< 100
    L        low        K>=100 & K <=204
    M        medium     K>=205 & K <=300
    H        high       K>300

In [65]:
df_warmseason_nu['CAT_P'] = ''
df_warmseason_nu['CAT_P'] = np.where(df_warmseason_nu.P < 10, 'VL', df_warmseason_nu.CAT_P)
df_warmseason_nu['CAT_P'] = np.where(((df_warmseason_nu.P > 10) & (df_warmseason_nu.P <= 30)), 'L', df_warmseason_nu.CAT_P)
df_warmseason_nu['CAT_P'] = np.where(((df_warmseason_nu.P > 30) & (df_warmseason_nu.P <= 60)), 'M', df_warmseason_nu.CAT_P)
df_warmseason_nu['CAT_P'] = np.where((df_warmseason_nu.P > 60), 'H', df_warmseason_nu.CAT_P)
 
df_warmseason_nu['CAT_K'] = ''
df_warmseason_nu['CAT_K'] = np.where(df_warmseason_nu.K < 100, 'VL', df_warmseason_nu.CAT_K)
df_warmseason_nu['CAT_K'] = np.where(((df_warmseason_nu.K >= 100) & (df_warmseason_nu.K <= 204)), 'L', df_warmseason_nu.CAT_K)
df_warmseason_nu['CAT_K'] = np.where(((df_warmseason_nu.K > 204) & (df_warmseason_nu.K <= 300)), 'M', df_warmseason_nu.CAT_K)
df_warmseason_nu['CAT_K'] = np.where((df_warmseason_nu.K > 300), 'H', df_warmseason_nu.CAT_K)

In [66]:
warnings.filterwarnings("ignore")
df_warmseason_p = np.round( df_warmseason_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_P'], values=['P'],aggfunc=(np.median,len),fill_value=0),2)
df_warmseason_k = np.round( df_warmseason_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_K'], values=['K'],aggfunc=(np.median,len),fill_value=0),2)
 
df_warmseason_p.columns
df_warmseason_k.columns
df_warmseason_p.columns = list(map("_".join,df_warmseason_p.columns))
df_warmseason_k.columns = list(map("_".join,df_warmseason_k.columns))
df_warmseason_p.columns = df_warmseason_p.columns.str.replace("P_median_", "P_")
df_warmseason_p.columns = df_warmseason_p.columns.str.replace("P_len", "P_count")
df_warmseason_k.columns = df_warmseason_k.columns.str.replace("K_median_","K_")
df_warmseason_k.columns = df_warmseason_k.columns.str.replace("K_len","K_count")
df_warmseason_p = df_warmseason_p.reset_index()
df_warmseason_k = df_warmseason_k.reset_index()


warmseason_level = df_warmseason_p.merge(df_warmseason_k, left_on='COUNTY', right_on='COUNTY')

file_out_level = fileOut.joinpath('warmseason_levels.csv')
warmseason_level.to_csv(file_out_level, index=False)
print ('total number of records written to CSV:','{:,}'.format(len(warmseason_level)),'\n')



total number of records written to CSV: 116 



## Cool Season Grass

#### Create list to select Cool Season Grass from database.

In [67]:
coolseason_sel = ['Bluegrass', 'Cool Season Grass', 'Fescue', 'Fescue/Lespedeza', 'Fescue/Lespedeza (multiple)', 'Fine Fescue', 'Lespedeza', 'Lespedeza/Grass', 'Millet', 'Orchardgrass', 'Perennial Ryegrass', 'Sorghum Sudangrass', 'Sorghum/Sudangras', 'Switchgrass', 'Tall Fescue', 'Timothy']
coolseason_sel.sort()
print(coolseason_sel)

['Bluegrass', 'Cool Season Grass', 'Fescue', 'Fescue/Lespedeza', 'Fescue/Lespedeza (multiple)', 'Fine Fescue', 'Lespedeza', 'Lespedeza/Grass', 'Millet', 'Orchardgrass', 'Perennial Ryegrass', 'Sorghum Sudangrass', 'Sorghum/Sudangras', 'Switchgrass', 'Tall Fescue', 'Timothy']


#### Select Cool Season Grass from dataset.

In [68]:
df_coolseason = df[df.CROP.isin(coolseason_sel)]
df_coolseason_nu = df_coolseason[['FIPS_NO','COUNTY','YEAR','P','K']].copy()
print(df_coolseason_nu.head())

    FIPS_NO COUNTY  YEAR      P      K
187       1  ADAIR  1990   28.0  108.0
188       1  ADAIR  1990   88.0  408.0
189       1  ADAIR  1990   30.0  533.0
190       1  ADAIR  1990   66.0  384.0
191       1  ADAIR  1990  140.0  767.0


### Calculate median by the year for each County.

In [69]:
df_coolseason_median = np.round( df_coolseason_nu.pivot_table(index='COUNTY', columns=['YEAR'], values=['P','K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_coolseason_median)
print(df_coolseason_median.columns)
df_coolseason_median.columns = list(map("_".join,df_coolseason_median.columns))
df_coolseason_median.columns = df_coolseason_median.columns.str.replace("P_median_", "P_med")
df_coolseason_median.columns = df_coolseason_median.columns.str.replace("P_len", "P_count")
df_coolseason_median.columns = df_coolseason_median.columns.str.replace("K_median_","K_med")
df_coolseason_median.columns = df_coolseason_median.columns.str.replace("K_len","K_count")
print(df_coolseason_median.columns)
df_coolseason_median = df_coolseason_median.reset_index()
print(df_coolseason_median)
file_out_median = fileOut.joinpath('coolseason_median.csv')  # path and filename
df_coolseason_median.to_csv(file_out_median, index=False)  # output to csv
print ('total number of records written to CSV:','{:,}'.format(len(df_coolseason_median)),'\n')

            K                                               ...      P         \
          len                                               ... median          
YEAR     1990 1991 1992 1993 1994 1995 1996 1997 1998 1999  ...   2010   2011   
COUNTY                                                      ...                 
ADAIR      37   57   43   43   51   49   96   57   75   52  ...   76.0   56.0   
ALLEN     135  250  201  272  276  154  130  156  119  133  ...   49.0   44.0   
ANDERSON   36    7   12   15   16   25   22   22    9   10  ...  130.0   27.0   
BALLARD    10   10   38   12   11   17   17   10   12   12  ...   66.0   22.0   
BARREN     82  106  149  201  242  200   90   56   31   39  ...   73.0   70.0   
...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...    ...   
WAYNE      55   40   32   52   64   83   77   76   64   76  ...   68.0   69.0   
WEBSTER     6   15    6    3    2   15    1   11    5   66  ...   32.0   34.0   
WHITLEY     3    3    4    1

#### Cool Season Grass, Set categories for P and K values to very low, low, medium, high, very high. Base values from AGR-1.

#### Categories for P
    Cat      Title       Break
    -------------------------------------
    VL       very low    P< 10
    L        low         P>= 10 & P<=30
    M        medium      P>30 & P<=60
    H        high        P>60
    
#### Categories for K
    Cat      Title      Break
   --------------------------------------
    VL       very low   K< 104
    L        low        K>=104 & K <=186
    M        medium     K>=187 & K <=300
    H        high       K>300

In [70]:
df_coolseason_nu['CAT_P'] = ''
df_coolseason_nu['CAT_P'] = np.where(df_coolseason_nu.P < 10, 'VL', df_coolseason_nu.CAT_P)
df_coolseason_nu['CAT_P'] = np.where(((df_coolseason_nu.P > 10) & (df_coolseason_nu.P <= 30)), 'L', df_coolseason_nu.CAT_P)
df_coolseason_nu['CAT_P'] = np.where(((df_coolseason_nu.P > 30) & (df_coolseason_nu.P <= 60)), 'M', df_coolseason_nu.CAT_P)
df_coolseason_nu['CAT_P'] = np.where((df_coolseason_nu.P > 60), 'H', df_coolseason_nu.CAT_P)
 
df_coolseason_nu['CAT_K'] = ''
df_coolseason_nu['CAT_K'] = np.where(df_coolseason_nu.K < 104, 'VL', df_coolseason_nu.CAT_K)
df_coolseason_nu['CAT_K'] = np.where(((df_coolseason_nu.K >= 104) & (df_coolseason_nu.K <= 186)), 'L', df_coolseason_nu.CAT_K)
df_coolseason_nu['CAT_K'] = np.where(((df_coolseason_nu.K > 186) & (df_coolseason_nu.K <= 300)), 'M', df_coolseason_nu.CAT_K)
df_coolseason_nu['CAT_K'] = np.where((df_coolseason_nu.K > 300), 'H', df_coolseason_nu.CAT_K)

In [71]:
warnings.filterwarnings("ignore")
df_coolseason_p = np.round( df_coolseason_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_P'], values=['P'],aggfunc=(np.median,len),fill_value=0),2)
df_coolseason_k = np.round( df_coolseason_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_K'], values=['K'],aggfunc=(np.median,len),fill_value=0),2)

df_coolseason_p.columns
df_coolseason_k.columns
df_coolseason_p.columns = list(map("_".join,df_coolseason_p.columns))
df_coolseason_k.columns = list(map("_".join,df_coolseason_k.columns))
df_coolseason_p.columns = df_coolseason_p.columns.str.replace("P_median_", "P_")
df_coolseason_p.columns = df_coolseason_p.columns.str.replace("P_len", "P_count")
df_coolseason_k.columns = df_coolseason_k.columns.str.replace("K_median_","K_")
df_coolseason_k.columns = df_coolseason_k.columns.str.replace("K_len","K_count")
df_coolseason_p = df_coolseason_p.reset_index()
df_coolseason_k = df_coolseason_k.reset_index()

coolseason_level = df_coolseason_p.merge(df_coolseason_k, left_on='COUNTY', right_on='COUNTY')

file_out_level = fileOut.joinpath('coolseason_levels.csv')
coolseason_level.to_csv(file_out_level, index=False)
print ('total number of records written to CSV:','{:,}'.format(len(coolseason_level)),'\n')

total number of records written to CSV: 120 



## Alfalfa Clover mix

#### Create list to select Alfalfa Clover mix from database.

In [72]:
alfalfa_sel = ['Alfalfa', 'Alfalfa/Cool Season', 'Bluegrass/White Clover', 'Clover/Grass', 'Fescue/White Clover', 'Orchardgrass/Red Clover', 'Orchardgrass/White Clover', 'Red Clover', 'Red Clover/Grass', 'Timothy/Red Clover', 'White Clover', 'White Clover/Grass']
alfalfa_sel.sort()
print(alfalfa_sel)

['Alfalfa', 'Alfalfa/Cool Season', 'Bluegrass/White Clover', 'Clover/Grass', 'Fescue/White Clover', 'Orchardgrass/Red Clover', 'Orchardgrass/White Clover', 'Red Clover', 'Red Clover/Grass', 'Timothy/Red Clover', 'White Clover', 'White Clover/Grass']


#### Select Alfalfa Clover mixCanfrom dataset.

In [73]:
df_alfalfa = df[df.CROP.isin(alfalfa_sel)]
df_alfalfa_nu = df_alfalfa[['FIPS_NO','COUNTY','YEAR','P','K']].copy()
print(df_alfalfa_nu.head())

  FIPS_NO COUNTY  YEAR      P      K
0       1  ADAIR  1990   28.0  158.0
1       1  ADAIR  1990   88.0  134.0
2       1  ADAIR  1990   70.0  256.0
3       1  ADAIR  1990  161.0  611.0
4       1  ADAIR  1990  105.0  315.0


### Calculate  median by year for each County.

In [74]:
df_alfalfa_median = np.round( df_alfalfa_nu.pivot_table(index='COUNTY', columns=['YEAR'], values=['P','K'],aggfunc=(np.median,len),fill_value=0),0)
print(df_alfalfa_median)
print(df_alfalfa_median.columns)
df_alfalfa_median.columns = list(map("_".join,df_alfalfa_median.columns))
df_alfalfa_median.columns = df_alfalfa_median.columns.str.replace("P_median_", "P_med")
df_alfalfa_median.columns = df_alfalfa_median.columns.str.replace("P_len", "P_count")
df_alfalfa_median.columns = df_alfalfa_median.columns.str.replace("K_median_","K_med")
df_alfalfa_median.columns = df_alfalfa_median.columns.str.replace("K_len","K_count")
print(df_alfalfa_median.columns)
df_alfalfa_median = df_alfalfa_median.reset_index()
print(df_alfalfa_median)
file_out_median = fileOut.joinpath('alfalfa_median.csv')  # path and filename
df_alfalfa_median.to_csv(file_out_median, index=False)  # output to csv
print ('total number of records written to CSV:','{:,}'.format(len(df_alfalfa_median)),'\n')

            K                                               ...      P         \
          len                                               ... median          
YEAR     1990 1991 1992 1993 1994 1995 1996 1997 1998 1999  ...   2010   2011   
COUNTY                                                      ...                 
ADAIR     209  130  130  145   67  115   90   75   66   49  ...   56.0   70.0   
ALLEN     115  112  119  116   72   57   23   54   20   34  ...  102.0   56.0   
ANDERSON   31   20   22   35   27   22   31   17    6    8  ...   93.0  128.0   
BALLARD    38   43   36   54   22   43   33   18   17   23  ...   54.0   28.0   
BARREN    337  262  498  474  299  287  146   54   63   42  ...   46.0   56.0   
...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...    ...    ...   
WAYNE      75   79  101   85   68   60   51   34   39   30  ...   74.0   42.0   
WEBSTER    46   76   74   45   18   10   21   24   11   16  ...   26.0   23.0   
WHITLEY   145  114  154  164

#### Alfalfa Clover mix, Set categories for P and K values to very low, low, medium, high, very high. Base values from AGR-1.

#### Categories for P
    Cat      Title       Break
    -------------------------------------
    VL       very low    P< 9
    L        low         P>= 9 & P<=27
    M        medium      P>28 & P<=60
    H        high        P>60
    
#### Categories for K
    Cat      Title      Break
   --------------------------------------
    VL       very low   K< 97
    L        low        K>=97 & K <=203
    M        medium     K>=204 & K <=296
    H        high       K>296

In [75]:
df_alfalfa_nu['CAT_P'] = ''
df_alfalfa_nu['CAT_P'] = np.where(df_alfalfa_nu.P < 9, 'VL', df_alfalfa_nu.CAT_P)
df_alfalfa_nu['CAT_P'] = np.where(((df_alfalfa_nu.P >= 9) & (df_alfalfa_nu.P <= 27)), 'L', df_alfalfa_nu.CAT_P)
df_alfalfa_nu['CAT_P'] = np.where(((df_alfalfa_nu.P > 27) & (df_alfalfa_nu.P <= 60)), 'M', df_alfalfa_nu.CAT_P)
df_alfalfa_nu['CAT_P'] = np.where((df_alfalfa_nu.P > 60), 'H', df_alfalfa_nu.CAT_P)

df_alfalfa_nu['CAT_K'] = ''
df_alfalfa_nu['CAT_K'] = np.where(df_alfalfa_nu.K < 97, 'VL', df_alfalfa_nu.CAT_K)
df_alfalfa_nu['CAT_K'] = np.where(((df_alfalfa_nu.K >= 97) & (df_alfalfa_nu.K <= 203)), 'L', df_alfalfa_nu.CAT_K)
df_alfalfa_nu['CAT_K'] = np.where(((df_alfalfa_nu.K > 203) & (df_alfalfa_nu.K <= 296)), 'M', df_alfalfa_nu.CAT_K)
df_alfalfa_nu['CAT_K'] = np.where((df_alfalfa_nu.K > 296), 'H', df_alfalfa_nu.CAT_K)

In [77]:
warnings.filterwarnings("ignore")
df_alfalfa_p = np.round( df_alfalfa_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_P'], values=['P'],aggfunc=(np.median,len),fill_value=0),2)
df_alfalfa_k = np.round( df_alfalfa_nu.pivot_table(index='COUNTY', columns=['YEAR', 'CAT_K'], values=['K'],aggfunc=(np.median,len),fill_value=0),2)

df_alfalfa_p.columns
df_alfalfa_k.columns
df_alfalfa_p.columns = list(map("_".join,df_alfalfa_p.columns))
df_alfalfa_k.columns = list(map("_".join,df_alfalfa_k.columns))
df_alfalfa_p.columns = df_alfalfa_p.columns.str.replace("P_median_", "P_")
df_alfalfa_p.columns = df_alfalfa_p.columns.str.replace("P_len", "P_count")
df_alfalfa_k.columns = df_alfalfa_k.columns.str.replace("K_median_","K_")
df_alfalfa_k.columns = df_alfalfa_k.columns.str.replace("K_len","K_count")
df_alfalfa_p = df_alfalfa_p.reset_index()
df_alfalfa_k = df_alfalfa_k.reset_index()

alfalfa_level = df_alfalfa_p.merge(df_alfalfa_k, left_on='COUNTY', right_on='COUNTY')

file_out_level = fileOut.joinpath('alfalfa_levels.csv')
alfalfa_level.to_csv(file_out_level, index=False)
print ('total number of records written to CSV:','{:,}'.format(len(alfalfa_level)),'\n')


total number of records written to CSV: 120 

