In [1]:
import os
import json
import tempfile
import requests
import urllib
import pygsheets
import numpy as np
import pandas as pd
import asyncio
from gql import Client, gql
from gql.transport.aiohttp import AIOHTTPTransport
from string import Template
from utilities import write_base64str_obj_to_file

In [2]:
# Select your transport with a defined url endpoint
transport = AIOHTTPTransport(url="https://gis-api.aiesec.org/graphql/?access_token=7789a122c78d710a724f3cd2a7d8daf809b467d46cf63eafa76b2ae87a32f035")

async def getData():
    # Create a GraphQL client using the defined transport
    async with Client(transport=transport, fetch_schema_from_transport=True) as session:

        # Provide a GraphQL query
        query = gql(
            """
            query getApplicationList ($limit: Int, $start_date: DateTime, $end_date: DateTime){
            allOpportunityApplication(per_page: $limit, filters: {created_at: {from: $start_date, to: $end_date}}) {
                data {
                id
                status
                created_at
                date_matched
                date_pay_by_cash
                date_approved
                date_realized
                experience_start_date
                experience_end_date
                date_approval_broken
                nps_response_completed_at
                updated_at
                person {
                    id
                    full_name
                    home_mc {
                    name
                    }
                    home_lc {
                    name
                    }
                }
                host_lc {
                    name
                }
                host_mc: home_mc {
                    name
                }
                opportunity {
                    id
                    created_at
                    title
                    duration
                    sub_product {
                    name
                    }
                    programme {
                    short_name_display
                    }
                }
                standards {
                    option
                }
                }
            }
            }
        """
        )

        params = {	"mc_id": [518],
                    "start_date": "2021-01-01",
                    "end_date": "",
                    "limit": 1000
                }

        # Execute the query on the transport
        results = await session.execute(query, variable_values=params)
        # print(result)
        return results

results = await getData()

### Build a dataframe out of it

In [3]:

# Reduce the dict by 3 Levels
results = results['allOpportunityApplication']['data']

#  Flatten dictionary and compress keys
results = pd.json_normalize(results, sep='_')

# results.replace([np.NaN, "", "-"], '', inplace=True)

## The dataset

In [4]:
results.head()

Unnamed: 0,id,status,created_at,date_matched,date_pay_by_cash,date_approved,date_realized,experience_start_date,experience_end_date,date_approval_broken,...,person_home_lc_name,host_lc_name,host_mc_name,opportunity_id,opportunity_created_at,opportunity_title,opportunity_duration,opportunity_sub_product_name,opportunity_programme_short_name_display,opportunity_sub_product
0,6162477,open,2021-05-12T14:49:44Z,,,,,,,,...,Awal,MC France,France,1274284,2021-05-04T13:30:34Z,HR Projects Manager,,Other,GTa,
1,6162430,matched,2021-05-12T13:43:03Z,2021-05-14T08:54:53Z,,,,,,,...,Dilmun,COLOMBO CENTRAL,Sri Lanka,1269589,2021-01-21T03:11:15Z,Forte - Empowering Sri Lankan Girls & Women,,,GV,
2,6161918,open,2021-05-11T17:38:19Z,,,,,,,,...,NSBM,Tylos,Bahrain,1273447,2021-04-12T20:21:06Z,Children's Intercultural Environment Advocate,,,GV,
3,6159525,open,2021-05-07T15:21:19Z,,,,,,,,...,Awal,SAMSUN,Turkey,1267025,2020-10-27T15:39:00Z,Practice Foreigner Language | Discuss World Is...,,,GV,
4,6158734,rejected,2021-05-06T16:32:17Z,,,,,,,,...,Awal,SAO CARLOS,Brazil,1273545,2021-04-14T23:12:55Z,Assistant English Teacher at Influx,,,GTe,


In [5]:
results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94 entries, 0 to 93
Data columns (total 26 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   id                                        94 non-null     object 
 1   status                                    94 non-null     object 
 2   created_at                                94 non-null     object 
 3   date_matched                              1 non-null      object 
 4   date_pay_by_cash                          0 non-null      object 
 5   date_approved                             0 non-null      object 
 6   date_realized                             0 non-null      object 
 7   experience_start_date                     0 non-null      object 
 8   experience_end_date                       0 non-null      object 
 9   date_approval_broken                      0 non-null      object 
 10  nps_response_completed_at               

### Extract Bahrain LC Names

Create new columns for Easy Reading and Indices
* LC
* LC_ID*
* Department
* Partner_MC
* Partner_LC

*-Probably do need it for verification

In [23]:
# Create new multi-indices for grouping
new_cols = ['dept_prefix', 'lc', 'partner_mc', 'partner_lc']

def generate_new_fields(row):

    if row['person_home_mc_name'] == 'Bahrain':
        values = ['o', row['person_home_lc_name'],
                  row['host_mc_name'], row['host_lc_name']]
    else:
        values = ['i', row['host_lc_name'],
                  row['person_home_mc_name'], row['person_home_lc_name']]

    return dict(zip(new_cols, values))

results[new_cols] = results.apply(lambda row: generate_new_fields(row), axis=1, result_type='expand')
results[new_cols].head(10)

Unnamed: 0,dept_prefix,lc,partner_mc,partner_lc
0,o,Awal,France,MC France
1,o,Dilmun,Sri Lanka,COLOMBO CENTRAL
2,i,Tylos,Sri Lanka,NSBM
3,o,Awal,Turkey,SAMSUN
4,o,Awal,Brazil,SAO CARLOS
5,o,Tylos,Turkey,IZMIR
6,o,Dilmun,Sri Lanka,COLOMBO CENTRAL
7,o,Dilmun,Turkey,ANKARA
8,o,(Closed ASU),Sri Lanka,COLOMBO CENTRAL
9,o,(Closed ASU),Turkey,ESKISEHIR


In [33]:
# Create a new field 'department' with incoming and outgoing labels as prefix
results['department'] = results.dept_prefix + results.opportunity_programme_short_name_display
results.drop('opportunity_programme_short_name_display', inplace=True, axis=1)
results['department']

AttributeError: 'DataFrame' object has no attribute 'opportunity_programme_short_name_display'

### Shorten datetime stamps

In [46]:

date_cols = ['id', 'created_at', 'date_matched', 'date_approved', 'date_realized', 'updated_at']
multi_indices = ['lc', 'department', 'partner_mc', 'partner_lc']

perf_table = results[date_cols + multi_indices]
perf_table[date_cols] = perf_table[date_cols].applymap(lambda x: x[:-10], na_action='ignore')
perf_table


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,id,created_at,date_matched,date_approved,date_realized,updated_at,lc,department,partner_mc,partner_lc
0,,2021-05-12,,,,2021-05-12,Awal,oGTa,France,MC France
1,,2021-05-12,2021-05-14,,,2021-05-14,Dilmun,oGV,Sri Lanka,COLOMBO CENTRAL
2,,2021-05-11,,,,2021-05-11,Tylos,iGV,Sri Lanka,NSBM
3,,2021-05-07,,,,2021-05-07,Awal,oGV,Turkey,SAMSUN
4,,2021-05-06,,,,2021-05-06,Awal,oGTe,Brazil,SAO CARLOS
...,...,...,...,...,...,...,...,...,...,...
89,,2021-02-21,,,,2021-02-28,(Closed RCSI),oGV,Brazil,SAO CARLOS
90,,2021-02-08,,,,2021-04-09,Awal,oGTa,Malaysia,"Taylor's University (TU) , Subang Jaya"
91,,2021-02-06,,,,2021-02-07,Dilmun,oGV,Egypt,AAST In CAIRO
92,,2021-02-05,,,,2021-04-06,Dilmun,oGV,Egypt,AAST In CAIRO


In [50]:
perf_table.groupby(["created_at", *multi_indices])['id'].count().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,id
created_at,lc,department,partner_mc,partner_lc,Unnamed: 5_level_1
2021-02-02,Tylos,oGTa,Germany,BONN,1
2021-02-05,Dilmun,oGV,Egypt,AAST In CAIRO,1
2021-02-06,Dilmun,oGV,Egypt,AAST In CAIRO,1
2021-02-08,Awal,oGTa,Malaysia,"Taylor's University (TU) , Subang Jaya",1
2021-02-21,(Closed RCSI),oGV,Brazil,SAO CARLOS,1
2021-02-28,(Closed RCSI),oGV,Turkey,KONYA,1
2021-03-02,Awal,oGTa,Egypt,Cairo University,1
2021-03-02,Awal,oGTa,Egypt,GUC,1
2021-03-02,Awal,oGTa,Egypt,MIU,1
2021-03-12,Awal,oGTa,United States,NEW YORK CITY,1


In [59]:
def generate_time_series(table: pd.DataFrame, other_indices, date_col: str, metric_name: str):
    time_series = table.groupby([date_col, *other_indices])['id'].count().to_frame()
    time_series.rename(columns={"id": metric_name}, inplace=True)
    time_series.index.names = ['date', *other_indices]
    return time_series


apps = generate_time_series(perf_table, multi_indices, "created_at", "application")
acc = generate_time_series(perf_table, multi_indices, "date_matched", "accepted")



apps.join([acc], how='outer')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,application,accepted
date,lc,department,partner_mc,partner_lc,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-02-02,Tylos,oGTa,Germany,BONN,1.0,
2021-02-05,Dilmun,oGV,Egypt,AAST In CAIRO,1.0,
2021-02-06,Dilmun,oGV,Egypt,AAST In CAIRO,1.0,
2021-02-08,Awal,oGTa,Malaysia,"Taylor's University (TU) , Subang Jaya",1.0,
2021-02-21,(Closed RCSI),oGV,Brazil,SAO CARLOS,1.0,
2021-02-28,(Closed RCSI),oGV,Turkey,KONYA,1.0,
2021-03-02,Awal,oGTa,Egypt,Cairo University,1.0,
2021-03-02,Awal,oGTa,Egypt,GUC,1.0,
2021-03-02,Awal,oGTa,Egypt,MIU,1.0,
2021-03-12,Awal,oGTa,United States,NEW YORK CITY,1.0,
