# Kiva Data Science For Good Competitition
Prepared by Hansel Wei

# Setup

In [None]:
# Check/Install Missing Package Requirements
!pip install pandas numpy seaborn sklearn tensorflow

In [60]:
import os
import pandas as pd
from six.moves import urllib

DOWNLOAD_ROOT = 'https://s3.amazonaws.com/kiva-datasets/' 
SAVE_PATH = './datasets/'
FILE_NAMES = ('kiva_loans.csv', 
              'kiva_mpi_region_locations.csv', 
              'loan_theme_ids.csv', 
              'loan_themes_by_region.csv')

def fetch_datasets(download_root=DOWNLOAD_ROOT, file_names=FILE_NAMES, save_path=SAVE_PATH):
    download_urls = [download_root + url for url in file_names]
    
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
        
    for i in range(len(file_names)):
        print("get (%d/%d): %s" % (i+1, len(file_names), download_urls[i]))
        if not os.path.isdir(save_path+file_names[i]):
            urllib.request.urlretrieve(download_urls[i], save_path+file_names[i])
            print("saved (%d/%d): %s" % (i+1, len(file_names), download_urls[i]))
        else:
            print("File name exists! Not updated. (%d/%d): %s" % (i+1, len(file_names), download_urls[i]))
            
def update_datasets(download_root=DOWNLOAD_ROOT, file_names=FILE_NAMES, save_path=SAVE_PATH):
    download_urls = [download_root + url for url in file_names]
    
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
        
    for i in range(len(data)):
        print("get (%d/%d): %s" % (i+1, len(file_names), download_urls[i]))
        print("saved (%d/%d): %s" % (i+1, len(file_names), download_urls[i]))
        urllib.request.urlretrieve(download_urls[i], save_path+file_names[i])

def load_kiva_datasets(save_path=SAVE_PATH, file_names=FILE_NAMES):
    data = []
    for filename in file_names:
        df = pd.read_csv(save_path+filename)
        data.append(df)
        print('loaded: %s' % filename)
    return tuple(data)

def print_labels(df):
    for index, label in enumerate(df.columns): print(index, label)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Get the Data

In [21]:
fetch_datasets()

get (1/4): https://s3.amazonaws.com/kiva-datasets/kiva_loans.csv
saved (1/4): https://s3.amazonaws.com/kiva-datasets/kiva_loans.csv
get (2/4): https://s3.amazonaws.com/kiva-datasets/kiva_mpi_region_locations.csv
saved (2/4): https://s3.amazonaws.com/kiva-datasets/kiva_mpi_region_locations.csv
get (3/4): https://s3.amazonaws.com/kiva-datasets/loan_theme_ids.csv
saved (3/4): https://s3.amazonaws.com/kiva-datasets/loan_theme_ids.csv
get (4/4): https://s3.amazonaws.com/kiva-datasets/loan_themes_by_region.csv
saved (4/4): https://s3.amazonaws.com/kiva-datasets/loan_themes_by_region.csv


In [13]:
loans, mpi, theme, region = load_kiva_datasets()

loaded: kiva_loans.csv
loaded: kiva_mpi_region_locations.csv
loaded: loan_theme_ids.csv
loaded: loan_themes_by_region.csv


# Exploring the Data

#### kiva_loans.csv

In [72]:
loans.head(3)

Unnamed: 0,id,funded_amount,loan_amount,activity,sector,use,country_code,country,region,currency,partner_id,posted_time,disbursed_time,funded_time,term_in_months,lender_count,tags,borrower_genders,repayment_interval,date
0,653051,300.0,300.0,Fruits & Vegetables,Food,"To buy seasonal, fresh fruits to sell.",PK,Pakistan,Lahore,PKR,247.0,2014-01-01 06:12:39+00:00,2013-12-17 08:00:00+00:00,2014-01-02 10:06:32+00:00,12.0,12,,female,irregular,2014-01-01
1,653053,575.0,575.0,Rickshaw,Transportation,to repair and maintain the auto rickshaw used ...,PK,Pakistan,Lahore,PKR,247.0,2014-01-01 06:51:08+00:00,2013-12-17 08:00:00+00:00,2014-01-02 09:17:23+00:00,11.0,14,,"female, female",irregular,2014-01-01
2,653068,150.0,150.0,Transportation,Transportation,To repair their old cycle-van and buy another ...,IN,India,Maynaguri,INR,334.0,2014-01-01 09:58:07+00:00,2013-12-17 08:00:00+00:00,2014-01-01 16:01:36+00:00,43.0,6,"user_favorite, user_favorite",female,bullet,2014-01-01


In [65]:
print_labels(loans)

0 id
1 funded_amount
2 loan_amount
3 activity
4 sector
5 use
6 country_code
7 country
8 region
9 currency
10 partner_id
11 posted_time
12 disbursed_time
13 funded_time
14 term_in_months
15 lender_count
16 tags
17 borrower_genders
18 repayment_interval
19 date


In [71]:
mpi.head(3)

Unnamed: 0,LocationName,ISO,country,region,world_region,MPI,geo,lat,lon
0,"Badakhshan, Afghanistan",AFG,Afghanistan,Badakhshan,South Asia,0.387,"(36.7347725, 70.81199529999999)",36.734772,70.811995
1,"Badghis, Afghanistan",AFG,Afghanistan,Badghis,South Asia,0.466,"(35.1671339, 63.7695384)",35.167134,63.769538
2,"Baghlan, Afghanistan",AFG,Afghanistan,Baghlan,South Asia,0.3,"(35.8042947, 69.2877535)",35.804295,69.287754


In [66]:
print_labels(mpi)

0 LocationName
1 ISO
2 country
3 region
4 world_region
5 MPI
6 geo
7 lat
8 lon


In [73]:
print_labels(theme)

0 id
1 Loan Theme ID
2 Loan Theme Type
3 Partner ID


In [75]:
theme.head(3)

Unnamed: 0,id,Loan Theme ID,Loan Theme Type,Partner ID
0,638631,a1050000000skGl,General,151.0
1,640322,a1050000000skGl,General,151.0
2,641006,a1050000002X1ij,Higher Education,160.0


In [74]:
print_labels(region)

0 Partner ID
1 Field Partner Name
2 sector
3 Loan Theme ID
4 Loan Theme Type
5 country
6 forkiva
7 region
8 geocode_old
9 ISO
10 number
11 amount
12 LocationName
13 geocode
14 names
15 geo
16 lat
17 lon
18 mpi_region
19 mpi_geo
20 rural_pct


In [76]:
region.head(3)

Unnamed: 0,Partner ID,Field Partner Name,sector,Loan Theme ID,Loan Theme Type,country,forkiva,region,geocode_old,ISO,...,amount,LocationName,geocode,names,geo,lat,lon,mpi_region,mpi_geo,rural_pct
0,9,KREDIT Microfinance Institution,General Financial Inclusion,a1050000000slfi,Higher Education,Cambodia,No,Banteay Meanchey,"(13.75, 103.0)",KHM,...,450,"Banteay Meanchey, Cambodia","[(13.6672596, 102.8975098)]",Banteay Meanchey Province; Cambodia,"(13.6672596, 102.8975098)",13.66726,102.89751,"Banteay Mean Chey, Cambodia","(13.6672596, 102.8975098)",90.0
1,9,KREDIT Microfinance Institution,General Financial Inclusion,a10500000068jPe,Vulnerable Populations,Cambodia,No,Battambang Province,,KHM,...,20275,"Battambang Province, Cambodia","[(13.0286971, 102.989615)]",Battambang Province; Cambodia,"(13.0286971, 102.989615)",13.028697,102.989615,"Banteay Mean Chey, Cambodia","(13.6672596, 102.8975098)",90.0
2,9,KREDIT Microfinance Institution,General Financial Inclusion,a1050000000slfi,Higher Education,Cambodia,No,Battambang Province,,KHM,...,9150,"Battambang Province, Cambodia","[(13.0286971, 102.989615)]",Battambang Province; Cambodia,"(13.0286971, 102.989615)",13.028697,102.989615,"Banteay Mean Chey, Cambodia","(13.6672596, 102.8975098)",90.0
