# Kiva data schema overview

In [None]:
from pprint import pprint
import pandas as pd
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport
from IPython.display import Markdown


transport = RequestsHTTPTransport(url="https://api.kivaws.org/graphql")
client = Client(transport=transport, fetch_schema_from_transport=True)

## Overall Stats

In [None]:
query = gql(
    """
{
  general {
    kivaStats {
      amountFunded
      numBorrowers,
      numCountries,
      numLenders,
      repaymentRate
    }
  }
}
"""
)
result = client.execute(query)

pprint(result)

## FAQ

### Sectors

> A sector is a broad category for a loan, e.g. Agriculture, Arts, Clothing. Sectors are subdivided further by activities.

In [None]:
query = gql(
    """
{
  lend {
    sector {
      id
      name
    }
  }
}
"""
)

result = client.execute(query)
sector = pd.DataFrame(result["lend"]["sector"])
sector_md = sector.to_markdown(index=False)
print(sector_md)

### Activity

> A property of loan which is more descriptive than Sector. Every activity is within a sector. e.g. the 'Animal Sales' activity is within the 'Agriculture' sector. Note, some Activities have the same name as their parent Sector

In [None]:
query = gql(
    """
{
  lend {
    activity {
      id,
      name
    }
  }
}
"""
)
result = client.execute(query)
activity = pd.DataFrame(result["lend"]["activity"])
activity_md = activity.to_markdown(index=False)
print(activity_md)
# display(Markdown(activity_md))

### Tags

> Loan properties which are attributed by lenders

In [None]:
query = gql(
    """
{
  lend {
    tag {
      id, # Unique identifier for this tag
      name, # The name of the tag
      vocabularyId # Vocabulary id for the tag type
    }
  }
}
"""
)
tags = client.execute(query)
tags = pd.DataFrame(tags["lend"]["tag"])
print(tags.sort_values(by=["vocabularyId"]).to_markdown(index=False))

### Theme

In [None]:
query = gql(
    """
{
  lend {
    loanThemeFilter {
      name
    }
  }
}
"""
)
themes = client.execute(query)
themes = pd.DataFrame(themes["lend"]["loanThemeFilter"])
print(themes.sort_values(by=["name"]).to_markdown(index=False))

### Country

In [None]:
query = gql(
    """
{
  lend {
    countryFacets {
      country {
        name
        isoCode
        region
        ppp
        numLoansFundraising
        fundsLentInCountry
      },
      count
    }
  }
}

"""
)
countries = client.execute(query)
countries = countries["lend"]["countryFacets"]
countries = pd.json_normalize(countries)
countries

In [None]:
import plotly.express as px
import country_converter as coco

countries["country.iso3"] = coco.convert(countries["country.isoCode"], to="ISO3")

fig = px.choropleth(
    countries,
    locations="country.iso3",
    color="count",
    hover_name="country.name",
    projection="natural earth",
    title="Number of Projects by Country",
)
fig.show()

## Details data of Projects


### Number of projects

In [None]:
query = gql(
    """
{
lend {
		loans(filters: {distributionModel: both, status: all}) {
      totalCount
    }
  }
}
"""
)
loans_count = client.execute(query)
loans_count = loans_count["lend"]["loans"]["totalCount"]
loans_count

### Example data

Download only one projects

In [None]:
# Let's query a simple Loan (which is a project)
query = gql(
    """
query FetchLoans($offset: Int!, $limit: Int!)
{
  lend {
    loans(offset: $offset, limit: $limit, filters: {distributionModel: both, status: all}, sortBy: newest) {
      totalCount
      values {
        activity {
          id,
          name
        }
        anonymizationLevel
        borrowerCount
        borrowers {
          id,
          borrowedAmount,
          firstName,
          gender,
          isPrimary,
          pictured
        }
        dafEligible
        delinquent
        description
        descriptionInOriginalLanguage
        disbursalDate
        distributionModel
        endorser {
          id,
          image {
            id,
            url(presetSize: default),
          },
          inviteeCount,
          lenderPage {
            city
            state
            country {
              name
              isoCode
              region
              ppp
              numLoansFundraising
              fundsLentInCountry
            }
            loanBecause
            occupation
            otherInfo
            url
            whereabouts
          }
          loanCount
          memberSince
          name
          publicId
        }
        fundraisingDate
        gender
        geocode {
          city
          state
          country {
            name
            isoCode
            region
            ppp
            numLoansFundraising
            fundsLentInCountry
          }
          postalCode
          latitude
          longitude
        }
        hasCurrencyExchangeLossLenders
        id
        image {
          id,
          url(presetSize: default)
        }
        isMatchable
        inPfp
        loanAmount
        loanFundraisingInfo {
          fundedAmount
          isExpiringSoon
          reservedAmount
        }
        lenderRepaymentTerm
        matcherAccountId
        matcherName
        matchRatio
        matchingText
        name
        originalLanguage {
          id
          isActive
          isoCode
          name
        }
        minNoteSize
        paidAmount
        pfpMinLenders
        plannedExpirationDate
        previousLoanId
        raisedDate
        researchScore
        repaymentInterval
        sector {
          id
          name
        }
        status
        tags
        terms {
          currency
          currencyFullName
          disbursalAmount
          disbursalDate
          expectedPayments {
            amount
            dueToKivaDate
            effectiveDate
            localAmount
          }
          loanAmount
          lenderRepaymentTerm
          lossLiabilityCurrencyExchange
          lossLiabilityNonpayment
          flexibleFundraisingEnabled
        }
        use
        userProperties {
          favorited
          lentTo
          subscribed
          promoEligible
          amountInBasket
        }
        video {
          thumbnailImageId
          youtubeId
        }
        whySpecial
        lendingActions(offset: 0, limit: 2) {
          totalCount
          values {
            lender {
              id
              name
              publicId
            }
            shareAmount
            teams
            latestSharePurchaseDate
          }
        }
      }
    }
  }
}
"""
)

loans = client.execute(query, variable_values={"offset": 2000000, "limit": 1})
loans = pd.json_normalize(loans["lend"]["loans"]["values"])
loans

In [None]:
ldisplay = loans.T.reset_index().rename(columns={"index": "field", 0: "Example Value"})
str1 = ldisplay[~ldisplay["field"].isin(["description", "descriptionInOriginalLanguage"])].to_markdown(index=False)
str1 = str1.replace("  ", "")
print(str1)
str2 = ldisplay[ldisplay["field"].isin(["description", "descriptionInOriginalLanguage"])].to_markdown(index=False)
str2 = str2.replace("  ", "")
print(str2)

Some columns of interested are

| column name | meaning |
|-------------|---------|
| `loanAmount` | The amount of this loan, as shown to lenders |
| `fundedAmount` | The amount of the loan that has been purchased by lenders |
| `raisedDate` | When the loan became raised, e.g. fully funded. Same as funded_date in v1 |
| `fundraisingDate` | When the loan started fundraising on Kiva. Same as posted_date in v1 |
| `plannedExpirationDate` | When the loan will expire if it is not fully funded |
| `disbursalDate` | The date on which the partner disbursed and/or intends to disburse the loan to the borrower |

those meanings are taken from Kiva's GraphQL Schema.

In [None]:
df = pd.json_normalize(loans["lend"]["loans"]["values"])
df["raisedDate"] = pd.to_datetime(df["raisedDate"])
df["fundraisingDate"] = pd.to_datetime(df["fundraisingDate"])
df["loanFundraisingInfo.fundedAmount"] = df["loanFundraisingInfo.fundedAmount"].astype(float)
df[
    [
        "loanAmount",
        "loanFundraisingInfo.fundedAmount",
        "raisedDate",
        "fundraisingDate",
        "plannedExpirationDate",
        "disbursalDate",
    ]
]

In [None]:
# Calculate the amount of money collected per day
df["funding_duration"] = df["raisedDate"] - df["fundraisingDate"]
df["funding_duration_days"] = df["funding_duration"].dt.total_seconds() / (24 * 60 * 60)
df["collection_speed"] = df["loanFundraisingInfo.fundedAmount"] / df["funding_duration_days"]
df["collection_speed"]