# Meeting Log Summary

The aim of this notebook is to generate three CSV files that will be used to display the Dashboard on Google Data Studio:
- meeting_group_march_sept.csv (Meetings duration)
- group_by_meetingCode.csv (Participants punctuality)
- mentors.csv (Mentors geolocation) <br>

Link to the Dashboard : https://datastudio.google.com/reporting/1df8411a-0c41-41ca-986b-0ba98b119932/page/p_bw2m1tmnpc
 

## Data Input

Two files are needed for the creation of the Dashboard:
- calendar.csv (Run code below which need credentials file *service-acc.json*)
- meetlogs.csv (Already provided)

NB: Make sure the files (notebook + files) are located in the same folder.

In [2]:
from google.oauth2 import service_account
from googleapiclient.discovery import build
import pandas as pd
import os

In [3]:
class google_apis:
    __webdev_cred = ''
    __mentor_cred = ''

    def __init__(self):
        scopes = [
            'https://www.googleapis.com/auth/calendar',
            'https://www.googleapis.com/auth/gmail.compose',
            'https://www.googleapis.com/auth/admin.directory.user',
            'https://www.googleapis.com/auth/admin.directory.group',
        ]
        SERVICE_ACCOUNT_FILE = os.path.join("", "service-acc.json")
        credentials = service_account.Credentials.from_service_account_file(
            'service-acc.json', scopes=scopes)
        self.__webdev_cred = credentials.with_subject(
            'webdevelopment@villagebookbuilders.org')
        self.__mentor_cred = credentials.with_subject(
            'mentor@villagebookbuilders.org')

    def calendar_data_arr(self):
        data_arr = [['Associated Calendar', 'Meet Link', 'Participants', 'Start Time', 'End Time', 'ID']]
        calendar_service = build('calendar', 'v3', credentials=self.__mentor_cred)
        ids = calendar_service.calendarList().list().execute()
        for calendar_list_entry in ids['items']:
            list = calendar_service.events().list(calendarId=calendar_list_entry['id']).execute()
            flag = True
            for event in list['items']:
                if 'hangoutLink' in event and 'attendees' in event:
                    data_arr.append([calendar_list_entry['summary'],
                                    event['hangoutLink'], event['attendees'], event['start'], event['end'], event['id']])
        df = pd.DataFrame(data_arr)
        df.to_csv('calendar.csv')

api_obj = google_apis()
api_obj.calendar_data_arr()
print('File calendar.csv created !')

calendar.csv created !


In [17]:
# Read the two CSV files
data_meetlogs = pd.read_csv('meetlogs.csv')
data_calendar = pd.read_csv('calendar.csv', header=1)

## First Page : Time spent in meetings

In [8]:
# # Select the columns of interest 
data = data_meetlogs[['Date', 'Meeting Code', 'Participant Identifier', 'Duration', 'Participant Name']]

# Modify the Date column format
data['Date'] = pd.to_datetime(data['Date'])
data['Date'] = data['Date'].dt.strftime('%m/%d/%Y')

# Not clear
data = data.groupby(by=["Meeting Code","Date", "Participant Name", "Participant Identifier"]).mean().reset_index()

# Add two new columns
username = 'mphightech|hightech1|hightech2|hightech3|hightech4|hightech.announcements|hightech.collaboration|mpchebosi| \
            chebosi1|chebosi2|chebosi3|chebosi.announcements|chebosi.collaboration|mpahero|ahero1|ahero2|ahero3|ahero4| \
            ahero5|ahero6|ahero.announcements|ahero.collaboration|mukono1|mukono2|mukono.announcements|mukono.collaboration| \
            sanlazaro|sanlazaro1|sanlazaro2|sanlazaro3|sanlazaro4|sanlazaro5|sanlazaro6|sanlazaro7|sakubunase2|sakubunase3| \
            mpadeiso|adeiso1|adeiso2|adeiso3|adeiso4|adeiso.announcements@villagebookbuilders.org|adeiso.collaboration@villagebookbuilders.org| \
            mpkadzakalowa|kadzakalowa1|kadzakalowa2|kadzakalowa3|kadzakalowa4|kadzakalowa5|kadzakalowa6|kadzakalowa7|kadzakalowa8| \
            kadzakalowa9|kadzakalowa10|mpcharitycentre|charitycentre1|charitycentre2|charitycentre3|charitycentre4|charitycentre5| \
            charitycentre6|charitycentre7|charitycentre8|charitycentreannouncements|charitycentre.collab|mpjollyland|jollyland1| \
            jollyland2|jollyland3|jollyland4|jollyland.annoucements|jollyland.collaboration|mpmaragoli|maragoli1|maragoli2|maragoli3| \
            maragoli4|maragoli.announcements|maragoli.collaboration|mpyocreek|yocreek1|yocreek2|yocreek3|yocreek4|yocreek5|yocreek6| \
            yocreek7|yocreek8|yocreek9|yocreek10|mpvima|vima1|vima2|vima3|vima4|mpbiwi|biwi1|biwi2|biwi3|biwiannouncements@villagebookbuilders.org| \
            biwi.collaboration@villagebookbuilders.org'
sub = "villagementors.org"
data['is_mentee']= data["Participant Identifier"].str.contains(username)
data['is_mentor']= data["Participant Identifier"].str.contains(sub)

# Attribute the role of each participant
def identify_type(is_mentee,is_mentor):
    if (is_mentee == True):
        return "Mentee"
    if (is_mentor == True):
        return "Mentor"
    else:
        return "Staff"

data['Identify_type'] = data.apply(lambda x: identify_type(x.is_mentee,x.is_mentor), axis=1)

# Convert new dataframe to csv file
data.to_csv('meeting_group_march_sept.csv')
print("File meeting_group_march_sept.csv created !")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


meeting_group_march_sept.csv created !


## Second Page : Participants punctuality 

In [102]:
# Preprocess calendar.csv
data_c = data_calendar[['Meet Link', 'Start Time', 'End Time']]
data_c['Meeting Code'] = data_c['Meet Link'].str.strip().str[-12:].str.split('-').str.join("").str.upper()
data_c['startDateTime'] = data_c['Start Time'].apply(lambda x: eval(x).get('dateTime'))
data_c['endDateTime'] = data_c['End Time'].apply(lambda x: eval(x).get('dateTime'))
data_c['Date_format'] = data_c['startDateTime'].str.strip().str[:10]
data_c = data_c[['Date_format', 'Meeting Code', 'startDateTime', 'endDateTime']]

# Preprocess meetlogs.csv
data_m = data_meetlogs[['Date', 'Meeting Code', 'Participant Identifier', 'Duration', 'Participant Name']]
data_m['Date_format'] = pd.to_datetime(data_m['Date']).dt.strftime('%Y-%m-%d')

# Join the two csv files
data_merged = pd.merge(data_m, data_c, on=['Date_format','Meeting Code'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[r

In [103]:
data_merged

Unnamed: 0,Date,Meeting Code,Participant Identifier,Duration,Participant Name,Date_format,startDateTime,endDateTime
0,"Sep 16, 2021, 10:29:30 AM GMT-4",QZVXBGRCRA,nupur.kumar@villagementors.org,1635.0,nupur kumar,2021-09-16,2021-09-16T10:00:00-04:00,2021-09-16T11:00:00-04:00
1,"Sep 15, 2021, 10:57:42 AM GMT-4",QSBEDURTCA,mpcharitycentre@villagebookbuilders.org,412.0,"Charity Centre, Zambia VBB Mentor Program",2021-09-15,2021-09-15T10:00:00-04:00,2021-09-15T11:00:00-04:00
2,"Sep 15, 2021, 10:57:39 AM GMT-4",QSBEDURTCA,mentor@villagebookbuilders.org,3513.0,Mentor Program,2021-09-15,2021-09-15T10:00:00-04:00,2021-09-15T11:00:00-04:00
3,"Sep 15, 2021, 10:57:37 AM GMT-4",QSBEDURTCA,jaron.quirante@villagementors.org,3528.0,jaron quirante,2021-09-15,2021-09-15T10:00:00-04:00,2021-09-15T11:00:00-04:00
4,"Sep 15, 2021, 10:50:36 AM GMT-4",QSBEDURTCA,mpcharitycentre@villagebookbuilders.org,247.0,"Charity Centre, Zambia VBB Mentor Program",2021-09-15,2021-09-15T10:00:00-04:00,2021-09-15T11:00:00-04:00
...,...,...,...,...,...,...,...,...
1684,"Mar 24, 2021, 6:48:22 AM GMT-4",TXHIPWSYIV,tristan.henning@villagementors.org,2792.0,tristan henning,2021-03-24,2021-03-24T06:00:00-04:00,2021-03-24T07:00:00-04:00
1685,"Mar 23, 2021, 6:39:40 AM GMT-4",KKYTKIOPYA,alondra.ramirez0215@gmail.com,1388.0,Alondra Ramirez Herrera,2021-03-23,2021-03-23T06:00:00-04:00,2021-03-23T07:00:00-04:00
1686,"Mar 23, 2021, 6:15:47 AM GMT-4",KKYTKIOPYA,alondra.ramirez0215@gmail.com,599.0,Alondra Ramirez Herrera,2021-03-23,2021-03-23T06:00:00-04:00,2021-03-23T07:00:00-04:00
1687,"Mar 22, 2021, 9:54:47 AM GMT-4",XMSDJBXPGD,nupurkumar64@gmail.com,3311.0,Nupur Kumar,2021-03-22,2021-03-22T09:00:00-04:00,2021-03-22T10:00:00-04:00


## Third Page : Mentors geolocation 

In [9]:
# Select the columns of interest 
data = data_meetlogs[['Participant Identifier', 'Participant Name', 'Organizer Email', 'City', 'Country']]

# Select only rows where the Organizer Email is 'mentor@villagebookbuilders.org' (the remainings are supposed not to be related to the mentoring program) and drop the column
data_mp = data.loc[data['Organizer Email'] == 'mentor@villagebookbuilders.org']
data_mp = data_mp.drop(columns=['Organizer Email'])

# Find all the mentees (whose either Identifier starts with 'mp' or Name with 'Computer') et discard them from the dataframe
data_mentors = data_mp.loc[~(data['Participant Identifier'].str.startswith('mp', na=False) | data['Participant Name'].str.startswith('Computer', na=False))]

# Drop all duplicates from the Participant Identifier
data_mentors = data_mentors.drop_duplicates(subset=['Participant Identifier'])

# Convert filtered dataframe to csv file
data_mentors.to_csv('mentors.csv', index=False)
print("File mentors.csv created !")

File mentors.csv created !
