# Fetch data from kiva.org

Take advantages of the kiva's public API

In [2]:
# as always, install some packages
%pip install -q gql[all] pandas country-converter plotly

In [1]:
from pprint import pprint
import pandas as pd
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport

transport = RequestsHTTPTransport(url="https://api.kivaws.org/graphql")
client = Client(transport=transport, fetch_schema_from_transport=True)

## Overall Stats

### Countries

In [2]:
query = gql(
"""
{
  general {
    kivaStats {
      amountFunded
      numBorrowers,
      numCountries,
      numLenders,
      repaymentRate,
      sectors {
        name
      }
    }
  }
}
""")
result = client.execute(query)

pprint(result)

{'general': {'kivaStats': {'amountFunded': 1935198470,
                           'numBorrowers': 4752764,
                           'numCountries': 80,
                           'numLenders': 2181437,
                           'repaymentRate': 96.39128928000001,
                           'sectors': [{'name': 'Agriculture'},
                                       {'name': 'Transportation'},
                                       {'name': 'Services'},
                                       {'name': 'Clothing'},
                                       {'name': 'Health'},
                                       {'name': 'Retail'},
                                       {'name': 'Manufacturing'},
                                       {'name': 'Arts'},
                                       {'name': 'Housing'},
                                       {'name': 'Food'},
                                       {'name': 'Wholesale'},
                                       {'name': 'Constructio

In [5]:
query = gql(
"""
{
  lend {
    countryFacets {
      country {
        name
        isoCode
        region
        ppp
        numLoansFundraising
        fundsLentInCountry
      },
      count
    }
  }
}

"""
)
countries = client.execute(query)
countries = countries['lend']['countryFacets']
countries = pd.json_normalize(countries)
countries

Unnamed: 0,count,country.name,country.isoCode,country.region,country.ppp,country.numLoansFundraising,country.fundsLentInCountry
0,212.0,Samoa,WS,Oceania,"$5,200",212,22483295.0
1,87.0,Mozambique,MZ,Africa,"$1,200",87,9177365.0
2,27.0,Dominican Republic,DO,North America,"$9,700",27,11925295.0
3,163.0,Bolivia,BO,South America,"$5,500",164,67485655.0
4,131.0,Guatemala,GT,Central America,"$5,300",131,41159005.0
...,...,...,...,...,...,...,...
74,43.0,Vanuatu,VU,Oceania,"$2,500",43,1245415.0
75,,Mongolia,MN,Asia,"$5,900",0,15691700.0
76,,Guam,GU,Oceania,"$28,700",0,24300.0
77,,Virgin Islands,VI,North America,"$14,500",0,17500.0


In [6]:
import plotly.express as px
import country_converter as coco

countries['country.iso3'] = coco.convert(countries['country.isoCode'], to='ISO3')

fig = px.choropleth(countries, locations='country.iso3', color='count', hover_name='country.name',
                    projection='natural earth',
                    title='Number of Projects by Country')
fig.show()

### Tags

In [5]:
query = gql(
"""
{
  lend {
    tag {
      name
    }
  }
}
"""
)
tags = client.execute(query)
tags = tags['lend']['tag']
tags = [i['name'] for i in tags]
print(len(tags))
pprint(tags)

76
['volunteer_pick',
 'volunteer_like',
 'user_like',
 'user_favorite',
 '#First Loan',
 '#Woman-Owned Business',
 '#Tourism',
 '#Sustainable Ag',
 '#Eco-friendly',
 '#Vegan',
 '#Animals',
 '#Widowed',
 '#Elderly',
 '#Single',
 '#Married',
 '#Parent',
 '#Single Parent',
 '#Schooling',
 '#Pre-disbursed',
 '#Post-disbursed',
 '#Inspiring Story',
 '#Unique',
 '#Interesting Photo',
 '#Powerful Story',
 '#Team Guys Holding Fish',
 '#Fabrics',
 '#Health and Sanitation',
 '#Repeat Borrower',
 '#Refugee',
 '#Job Creator',
 '#Supporting Family',
 '#Orphan',
 '#Low-profit FP',
 '#Hidden Gem',
 '#Biz Durable Asset',
 '#Trees',
 '#Female Education',
 '#Technology',
 '#Repair Renew Replace',
 '#US immigrant',
 'reserved_disaster_relief_covid',
 'reserved_crisis_support_loan',
 '#US Black-Owned Business',
 '',
 '#Latinx/Hispanic-Owned Business',
 'cow',
 'IT Cosmetics',
 'beauty',
 'Turo',
 'GoDaddy',
 '#BIPOC-owned Business',
 '#Umpqua',
 '#US Environmental Loan',
 '#CommunityImpact',
 '#FamilyImp

### Sectors

In [8]:
query = gql(
"""
{
  lend {
    sector {
      name
    }
  }
}
"""
)
sectors = client.execute(query)
sectors = sectors['lend']['sector']
sectors = [i['name'] for i in sectors]
pprint(sectors)

['Agriculture',
 'Transportation',
 'Services',
 'Clothing',
 'Health',
 'Retail',
 'Manufacturing',
 'Arts',
 'Housing',
 'Food',
 'Wholesale',
 'Construction',
 'Education',
 'Personal Use',
 'Entertainment']


## Details data of loans


### Number of projects

In [3]:
query = gql(
"""
{
lend {
		loans(filters: {distributionModel: both, status: all}) {
      totalCount
    }
  }
}
"""
)
loans_count = client.execute(query)
loans_count = loans_count['lend']['loans']['totalCount']
loans_count

2562598

### Fetch all datas
We have to download all `2558335` projects in order to gain more insights. In the following part, I demonstrate how to fetch only 10 loans, just to show what we could do.

In [9]:
# Let's query a simple Loan (which is a project)
query = gql(
"""
query FetchLoans($offset: Int!, $limit: Int!)
{
  lend {
    loans(offset: $offset, limit: $limit, filters: {distributionModel: both, status: all}, sortBy: newest) {
      totalCount
      values {
        activity {
          id,
          name
        }
        anonymizationLevel
        borrowerCount
        borrowers {
          id,
          borrowedAmount,
          firstName,
          gender,
          isPrimary,
          pictured
        }
        dafEligible
        delinquent
        description
        descriptionInOriginalLanguage
        disbursalDate
        distributionModel
        endorser {
          id,
          image {
            id,
            url(presetSize: default),
          },
          inviteeCount,
          lenderPage {
            city
            state
            country {
              name
              isoCode
              region
              ppp
              numLoansFundraising
              fundsLentInCountry
            }
            loanBecause
            occupation
            otherInfo
            url
            whereabouts
          }
          loanCount
          memberSince
          name
          publicId
        }
        fundraisingDate
        gender
        geocode {
          city
          state
          country {
            name
            isoCode
            region
            ppp
            numLoansFundraising
            fundsLentInCountry
          }
          postalCode
          latitude
          longitude
        }
        hasCurrencyExchangeLossLenders
        id
        image {
          id,
          url(presetSize: default)
        }
        isMatchable
        inPfp
        loanAmount
        loanFundraisingInfo {
          fundedAmount
          isExpiringSoon
          reservedAmount
        }
        lenderRepaymentTerm
        matcherAccountId
        matcherName
        matchRatio
        matchingText
        name
        originalLanguage {
          id
          isActive
          isoCode
          name
        }
        minNoteSize
        paidAmount
        pfpMinLenders
        plannedExpirationDate
        previousLoanId
        raisedDate
        researchScore
        repaymentInterval
        sector {
          id
          name
        }
        status
        tags
        terms {
          currency
          currencyFullName
          disbursalAmount
          disbursalDate
          expectedPayments {
            amount
            dueToKivaDate
            effectiveDate
            localAmount
          }
          loanAmount
          lenderRepaymentTerm
          lossLiabilityCurrencyExchange
          lossLiabilityNonpayment
          flexibleFundraisingEnabled
        }
        use
        userProperties {
          favorited
          lentTo
          subscribed
          promoEligible
          amountInBasket
        }
        video {
          thumbnailImageId
          youtubeId
        }
        whySpecial
      }
    }
  }
}
"""
)

loans = client.execute(query, variable_values={
    "offset": 1000000,
    "limit": 10
})
pprint(loans)


{'lend': {'loans': {'totalCount': 2558347,
                    'values': [{'activity': {'id': 67,
                                             'name': 'Food Production/Sales'},
                                'anonymizationLevel': 'none',
                                'borrowerCount': 1,
                                'borrowers': [{'borrowedAmount': '1000.00',
                                               'firstName': 'Ana Cecilia',
                                               'gender': 'female',
                                               'id': 6094078,
                                               'isPrimary': True,
                                               'pictured': True}],
                                'dafEligible': True,
                                'delinquent': False,
                                'description': 'Ana is 51 years old. She '
                                               'studied until the 3rd grade, '
                                    

Some columns of interested are

| column name | meaning |
|-------------|---------|
| `loanAmount` | The amount of this loan, as shown to lenders |
| `fundedAmount` | The amount of the loan that has been purchased by lenders |
| `raisedDate` | When the loan became raised, e.g. fully funded. Same as funded_date in v1 |
| `fundraisingDate` | When the loan started fundraising on Kiva. Same as posted_date in v1 |
| `plannedExpirationDate` | When the loan will expire if it is not fully funded |
| `disbursalDate` | The date on which the partner disbursed and/or intends to disburse the loan to the borrower |

those meanings are taken from Kiva's GraphQL Schema.

In [10]:
df = pd.json_normalize(loans['lend']['loans']['values'])
df['raisedDate'] = pd.to_datetime(df['raisedDate'])
df['fundraisingDate'] = pd.to_datetime(df['fundraisingDate'])
df['loanFundraisingInfo.fundedAmount'] = df['loanFundraisingInfo.fundedAmount'].astype(float)
df[['loanAmount', 'loanFundraisingInfo.fundedAmount', 'raisedDate', 'fundraisingDate', 'plannedExpirationDate', 'disbursalDate', ]]

Unnamed: 0,loanAmount,loanFundraisingInfo.fundedAmount,raisedDate,fundraisingDate,plannedExpirationDate,disbursalDate
0,1000.0,1000.0,2018-09-12 21:14:03+00:00,2018-08-16 20:20:06+00:00,2018-09-15T20:20:06Z,2018-08-10T07:00:00Z
1,325.0,325.0,2018-08-17 10:37:38+00:00,2018-08-16 20:20:02+00:00,2018-09-15T20:20:02Z,2018-08-14T07:00:00Z
2,150.0,150.0,2018-08-16 21:19:23+00:00,2018-08-16 20:00:07+00:00,2018-09-15T20:00:07Z,2018-07-17T07:00:00Z
3,75.0,75.0,2018-08-17 00:03:42+00:00,2018-08-16 20:00:06+00:00,2018-09-15T20:00:06Z,2018-07-15T07:00:00Z
4,325.0,325.0,2018-08-18 11:53:49+00:00,2018-08-16 20:00:06+00:00,2018-09-15T20:00:06Z,2018-07-15T07:00:00Z
5,175.0,175.0,2018-08-18 12:30:15+00:00,2018-08-16 20:00:06+00:00,2018-09-15T20:00:06Z,2018-07-15T07:00:00Z
6,900.0,900.0,2018-08-21 12:10:14+00:00,2018-08-16 20:00:06+00:00,2018-09-15T20:00:06Z,2018-08-18T07:00:00Z
7,3475.0,3475.0,2018-09-15 19:58:12+00:00,2018-08-16 20:00:06+00:00,2018-09-15T20:00:06Z,2018-08-03T07:00:00Z
8,700.0,700.0,2018-08-21 21:40:10+00:00,2018-08-16 20:00:05+00:00,2018-09-15T20:00:05Z,2018-07-24T07:00:00Z
9,400.0,400.0,2018-08-19 22:52:49+00:00,2018-08-16 20:00:05+00:00,2018-09-15T20:00:05Z,2018-07-30T07:00:00Z


In [11]:
# Calculate the amount of money collected per day
df['funding_duration'] = df['raisedDate'] - df['fundraisingDate']
df['funding_duration_days'] = df['funding_duration'].dt.total_seconds() / (24 * 60 * 60)
df['collection_speed'] = df['loanFundraisingInfo.fundedAmount'] / df['funding_duration_days']
df['collection_speed']

0      36.985716
1     545.708955
2    2724.978974
3     443.349754
4     195.511861
5     103.697303
6     192.566764
7     115.838428
8     138.080616
9     128.207031
Name: collection_speed, dtype: float64