In [1]:
import os
import sys

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl

import seaborn as sns

import pandas as pd
import numpy as np

pd.set_option("display.max_columns", 101)
pd.set_option("display.float_format", lambda x: "%.2f" % x )

In [2]:
DATA_ROOT = os.path.abspath(os.path.join(os.getcwd(), os.pardir, "data"))
RAW = os.path.join(DATA_ROOT, "raw")

In [3]:
def print_md_summary(df):
    template = " - [{}](#{})"
    
    toc = [template.format(c, c.lower()) for c in df.columns]
    toc = "\n".join(toc)
    
    print(toc)
    
    c_template = """
#### `{}`

Contains

{}:

```
{}
```

"""
    
    for c in df.columns:
        if df[c].nunique() > 25:
            non_null_sample = df.loc[pd.notnull(df[c]), c]
            samp_str = "First 5 non-null values"
            samp = str(non_null_sample.head())
        else:
            samp_str = "Unique values in the column"
            samp = str(df[c].value_counts())
            
        print(c_template.format(c, samp_str, samp))

# User/Farmer Data

In [4]:
user_data = pd.read_csv(os.path.join(RAW, 'User Data/user_data.csv'), index_col=0)

tbl_farmer = pd.read_csv(os.path.join(RAW, 'User Data/tbl_farmer.csv'))
tbl_farmer = tbl_farmer[[c for c in tbl_farmer.columns if 'Unnamed:' not in c]]

In [5]:
tbl_farmer.head()

Unnamed: 0,date_created,phone_number,farmer_name,id_number,county,constituency,ward,tocode,language_preference,dob,gender,marital_status,dependants_number,sub_location
0,2016-05-27 07:58:42.747149+03,254711205984,,,,,,,sw,,2,1,,Woodley
1,2016-05-27 07:58:42.747149+03,254713729872,Angela De Michele,12345678.0,Mombasa,Changamwe,Kipevu,"Kipevu,Changamwe,Mombasa",en,1988.0,2,4,10.0,Kaptich
2,2016-05-27 07:58:42.747149+03,254726712505,,,,,,,sw,,2,1,,Woodley
3,2016-05-27 07:58:42.747149+03,254725102659,,,,,,,sw,,2,1,,Woodley
4,2016-05-27 07:58:42.747149+03,254716336478,,,,,,,sw,,2,1,,Woodley


In [10]:
print_md_summary(user_data)

 - [date_created](#date_created)
 - [phone_number](#phone_number)
 - [farmer_name](#farmer_name)
 - [id_number](#id_number)
 - [county](#county)
 - [constituency](#constituency)
 - [ward](#ward)
 - [tocode](#tocode)
 - [language_preference](#language_preference)
 - [dob](#dob)
 - [gender](#gender)
 - [marital_status](#marital_status)
 - [dependants_number](#dependants_number)
 - [sub_location](#sub_location)
 - [wards_coded](#wards_coded)

#### `date_created`

Contains

First 5 non-null values:

```
2     2016-05-27 07:58:42.747149+03
8     2016-05-30 13:52:03.299849+03
9      2016-05-31 13:26:33.19348+03
10    2016-06-09 08:59:46.489033+03
11    2016-02-16 11:25:41.941277+03
Name: date_created, dtype: object
```



#### `phone_number`

Contains

First 5 non-null values:

```
2    254714000000.00
8    254717000000.00
9    254728000000.00
10   254714000000.00
11   254728000000.00
Name: phone_number, dtype: float64
```



#### `farmer_name`

Contains

First 5 non-null values:

```
2     

# Crop Price Data Cleaning and Exploration

In [4]:
all_crop_prices = pd.read_csv(os.path.join(DATA_ROOT, "processed", "all_crop_prices.csv"),
                              index_col=0)
all_crop_prices.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,Bungoma,Busia,Chwele,Class,Code,Commodity,Eldoret,Embu,Gakorn,Garissa,Gem,Imenti,Imenti North,Imenti South,Isiolo,Kajiado,Kakamega,Kapsowar,Karatina,Kg,Kibwezi,Kisii,Kisumu,Kitale,Kitui,Loitokitok,Machakos,Malindi,Mandera,Marakwet,Marimanti,Mathira,Maua,Meru,Mombasa,Mwala,Mwingi,Nairobi,Nakuru,Nkubu,Nyahururu,Siaya,Tavata,Tharaka North,Tharaka South,Thika,Unit,VARIETY,Wajir,Yala,date
0,,,,,,Dry Maize,3000.0,,,,,,,,,,,,,90.0,,,3600.0,2500.0,,,,3420.0,,,,,,,3000.0,,,3200.0,,,,,,,,,Bag,CEREAL,,,2014-04-01 00:00:00
1,,,,,,Green Maize,1620.0,,,,,,,,,,,,,115.0,,,2400.0,2800.0,,,,3000.0,,,,,,,6200.0,,,3500.0,,,,,,,,,Ext Bag,,,,2014-04-01 00:00:00
2,,,,,,Finger Millet,6750.0,,,,,,,,,,,,,90.0,,,7200.0,5400.0,,,,6300.0,,,,,,,8100.0,,,6500.0,,,,,,,,,Bag,,,,2014-04-01 00:00:00
3,,,,,,Sorghum,4500.0,,,,,,,,,,,,,90.0,,,3600.0,3600.0,,,,5400.0,,,,,,,2700.0,,,3800.0,,,,,,,,,Bag,,,,2014-04-01 00:00:00
4,,,,,,Wheat,3300.0,,,,,,,,,,,,,90.0,,,,4500.0,,,,5400.0,,,,,,,,,,,,,,,,,,,Bag,,,,2014-04-01 00:00:00


In [5]:
print_md_summary(all_crop_prices)

 - [Bungoma](#bungoma)
 - [Busia](#busia)
 - [Chwele](#chwele)
 - [Class](#class)
 - [Code](#code)
 - [Commodity](#commodity)
 - [Eldoret](#eldoret)
 - [Embu](#embu)
 - [Gakorn](#gakorn)
 - [Garissa](#garissa)
 - [Gem](#gem)
 - [Imenti](#imenti)
 - [Imenti North](#imenti north)
 - [Imenti South](#imenti south)
 - [Isiolo](#isiolo)
 - [Kajiado](#kajiado)
 - [Kakamega](#kakamega)
 - [Kapsowar](#kapsowar)
 - [Karatina](#karatina)
 - [Kg](#kg)
 - [Kibwezi](#kibwezi)
 - [Kisii](#kisii)
 - [Kisumu](#kisumu)
 - [Kitale](#kitale)
 - [Kitui](#kitui)
 - [Loitokitok](#loitokitok)
 - [Machakos](#machakos)
 - [Malindi](#malindi)
 - [Mandera](#mandera)
 - [Marakwet](#marakwet)
 - [Marimanti](#marimanti)
 - [Mathira](#mathira)
 - [Maua](#maua)
 - [Meru](#meru)
 - [Mombasa](#mombasa)
 - [Mwala](#mwala)
 - [Mwingi](#mwingi)
 - [Nairobi](#nairobi)
 - [Nakuru](#nakuru)
 - [Nkubu](#nkubu)
 - [Nyahururu](#nyahururu)
 - [Siaya](#siaya)
 - [Tavata](#tavata)
 - [Tharaka North](#tharaka north)
 - [Tharaka Sout