In [1]:
# Imports

import pandas as pd
import numpy as np
import requests
import json
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

In [2]:
# 1. Data adquisition    

def get_csv(csv, index): 
    try: 
        df = pd.read_csv(csv).set_index(index)
    except FileNotFoundError as e: 
        return e
    return df


def get_students_and_katas(students_csv='input/students.csv', 
                           katas_csv='input/katas.csv', 
                           students_index='username', 
                           katas_index='slug'): 
    df1 = get_csv(students_csv, students_index)
    df2 = get_csv(katas_csv, katas_index)
    return df1, df2

df_students, df_katas = get_students_and_katas()
katas = [s for s in df_katas.index]
students = [s for s in df_students.index]

df_katas.head()
# df_katas
# df_students

Unnamed: 0_level_0,date,minutes
slug,Unnamed: 1_level_1,Unnamed: 2_level_1
regexp-fun-number-1-when-i-miss-few-days-of-gym,2019-02-01T16:20:21.241Z,60
deodorant-evaporator,2019-02-01T16:20:21.241Z,60
ordered-count-of-characters,2019-02-13T10:00:00.241Z,45
reverse-or-rotate,2019-02-15T13:00:00.241Z,60
the-deaf-rats-of-hamelin,2019-02-19T15:00:00.241Z,120


In [3]:
# 2. wrangle
def clean_katas(df): 
    df_katas = df.copy()
    
    # transform minutes to int
    df_katas.minutes = df_katas.minutes.astype('int64')
    
    # transform date to datetime
    df_katas.date = pd.to_datetime(df_katas.date,infer_datetime_format=True)
    
    # add minutes to each row combining 2 columns
    df_katas['limit'] = df_katas.apply(lambda row: row['date'] + pd.Timedelta(minutes=row['minutes']), axis=1)
    
    # transform limit to datetime
    df_katas.limit = pd.to_datetime(df_katas.limit,infer_datetime_format=True)
    
    return df_katas

# transform minutes to int
df_katas = clean_katas(df_katas)

df_katas.head()

Unnamed: 0_level_0,date,minutes,limit
slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
regexp-fun-number-1-when-i-miss-few-days-of-gym,2019-02-01 16:20:21.241000+00:00,60,2019-02-01 17:20:21.241000+00:00
deodorant-evaporator,2019-02-01 16:20:21.241000+00:00,60,2019-02-01 17:20:21.241000+00:00
ordered-count-of-characters,2019-02-13 10:00:00.241000+00:00,45,2019-02-13 10:45:00.241000+00:00
reverse-or-rotate,2019-02-15 13:00:00.241000+00:00,60,2019-02-15 14:00:00.241000+00:00
the-deaf-rats-of-hamelin,2019-02-19 15:00:00.241000+00:00,120,2019-02-19 17:00:00.241000+00:00


In [4]:


df = pd.DataFrame(index=df_students.index, columns=df_katas.index)


In [5]:
# https://www.codewars.com/api/v1/users/Livia Canet/code-challenges/completed
# https://www.codewars.com/kata/

def get_completed_by_user(user, host='https://www.codewars.com/api/v1/users/', 
                          path='/code-challenges/completed'):
    url = host + user + path
    resp = requests.get(url)
    
    return resp.json()

In [6]:
# tarda un rato

# Diccionario k: v (usuario: resultado de la API
completed = {u: get_completed_by_user(u) for u in df_students.index}

In [7]:
# dictionary k: v (username: lista de katas completadas)
completed_katas_dict = dict([(k, v['data']) for k, v in completed.items()] )
completed_katas_dict

{'paulapr': [{'id': '5a03af9606d5b65ff7000009',
   'name': 'User class for Banking System',
   'slug': 'user-class-for-banking-system',
   'completedLanguages': ['python'],
   'completedAt': '2019-02-25T10:16:29.963Z'},
  {'id': '58ab2ed1acbab2eacc00010e',
   'name': 'Scraping: Get the Year a CodeWarrior Joined',
   'slug': 'scraping-get-the-year-a-codewarrior-joined',
   'completedLanguages': ['python'],
   'completedAt': '2019-02-20T16:32:41.854Z'},
  {'id': '598106cb34e205e074000031',
   'name': 'The Deaf Rats of Hamelin',
   'slug': 'the-deaf-rats-of-hamelin',
   'completedLanguages': ['python'],
   'completedAt': '2019-02-19T22:09:27.506Z'},
  {'id': '5c2b4182ac111c05cf388858',
   'name': 'Read the time',
   'slug': 'read-the-time',
   'completedLanguages': ['python'],
   'completedAt': '2019-02-19T16:39:55.303Z'},
  {'id': '56b5afb4ed1f6d5fb0000991',
   'name': 'Reverse or rotate?',
   'slug': 'reverse-or-rotate',
   'completedLanguages': ['python'],
   'completedAt': '2019-02-15

In [8]:
df_intime = df.copy()
df_intime.head()

slug,regexp-fun-number-1-when-i-miss-few-days-of-gym,deodorant-evaporator,ordered-count-of-characters,reverse-or-rotate,the-deaf-rats-of-hamelin,read-the-time,scraping-get-the-year-a-codewarrior-joined,user-class-for-banking-system,moving-average
username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
paulapr,,,,,,,,,
Almugs,,,,,,,,,
hector-moreno,,,,,,,,,
potacho,,,,,,,,,
carlosmd14,,,,,,,,,


In [9]:
def user_time_kata(user, slug, completed, katas): 
    res = '2090-01-01T00:00:00.000Z'# default
    for data in completed[user]: 
        if data['slug'] == slug: 
            res = data['completedAt']
            break
    return res

for user in students: 
    for slug in katas: 
        df_intime.loc[user, slug] = user_time_kata(user, slug, completed_katas_dict, katas)

for col in katas: 
    df_intime[col] = pd.to_datetime(df_intime[col],infer_datetime_format=True)

df_intime.head()

slug,regexp-fun-number-1-when-i-miss-few-days-of-gym,deodorant-evaporator,ordered-count-of-characters,reverse-or-rotate,the-deaf-rats-of-hamelin,read-the-time,scraping-get-the-year-a-codewarrior-joined,user-class-for-banking-system,moving-average
username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
paulapr,2019-02-01 16:20:21.241000+00:00,2019-01-31 10:23:30.056000+00:00,2019-02-13 09:11:58.899000+00:00,2019-02-15 14:23:21.428000+00:00,2019-02-19 22:09:27.506000+00:00,2019-02-19 16:39:55.303000+00:00,2019-02-20 16:32:41.854000+00:00,2019-02-25 10:16:29.963000+00:00,2090-01-01 00:00:00+00:00
Almugs,2090-01-01 00:00:00+00:00,2019-01-31 10:07:42.203000+00:00,2019-02-13 09:22:22.716000+00:00,2090-01-01 00:00:00+00:00,2019-02-19 15:43:33.639000+00:00,2019-02-19 16:28:17.996000+00:00,2090-01-01 00:00:00+00:00,2019-02-25 10:03:02.577000+00:00,2090-01-01 00:00:00+00:00
hector-moreno,2019-02-01 09:20:36.608000+00:00,2019-01-31 09:50:30.396000+00:00,2019-02-13 09:32:00.389000+00:00,2019-02-15 14:28:12.748000+00:00,2019-02-19 14:48:02.786000+00:00,2019-02-19 16:55:31.264000+00:00,2019-02-20 16:27:56.302000+00:00,2019-02-25 09:52:24.701000+00:00,2019-02-26 09:45:34.352000+00:00
potacho,2019-02-01 10:20:48.105000+00:00,2019-01-31 09:33:19.252000+00:00,2090-01-01 00:00:00+00:00,2019-02-15 14:36:18.055000+00:00,2019-02-19 16:40:50.287000+00:00,2019-02-19 18:02:56.589000+00:00,2019-02-21 08:41:16.266000+00:00,2090-01-01 00:00:00+00:00,2019-02-26 09:29:59.578000+00:00
carlosmd14,2019-02-01 09:40:03.013000+00:00,2019-01-31 09:15:42.108000+00:00,2019-02-13 09:12:50.216000+00:00,2019-02-15 12:28:29.372000+00:00,2019-02-19 15:54:28.113000+00:00,2019-02-19 15:17:55.685000+00:00,2019-02-20 16:15:00.406000+00:00,2019-02-25 09:20:42.073000+00:00,2019-02-26 09:18:09.770000+00:00


In [10]:


# df_time.loc['paulapr'] < df_katas.limit

for user in students: 
    df_intime.loc[user] = df_intime.loc[user] < df_katas.limit
df_intime

slug,regexp-fun-number-1-when-i-miss-few-days-of-gym,deodorant-evaporator,ordered-count-of-characters,reverse-or-rotate,the-deaf-rats-of-hamelin,read-the-time,scraping-get-the-year-a-codewarrior-joined,user-class-for-banking-system,moving-average
username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
paulapr,True,True,True,False,False,True,True,True,False
Almugs,False,True,True,False,True,True,False,True,False
hector-moreno,True,True,True,False,True,True,True,True,True
potacho,True,True,False,False,True,False,False,False,True
carlosmd14,True,True,True,True,True,True,True,True,True
Livia Canet,True,True,False,False,False,False,False,True,False
AlejandroFrank,True,False,True,False,True,False,True,True,True
AlejandroPerela,True,True,True,False,True,True,True,True,True
leyremarazuela,False,False,False,False,True,False,True,True,False
LauraRepullo,False,False,True,False,False,False,False,False,False


In [11]:
def save_results(df, path='results.csv'): 
    df.to_csv(path, index=True)
    
save_results(df_intime)

In [12]:
   
    

def main(): 
    pass
    # data = acquire()
    # filtered = wrangle(data)
    # results = analyze(filtered)
    # barchart = visualize(results)
    # save_viz(barchart)

if __name__ == '__main__':
    main()

In [22]:
import random 

def choose_presenter(df): 
    last_kata = df.columns[-1]
    completada = list(df_intime[df_intime[last_kata]].index)
    return random.choice(completada)

print(choose_presenter(df_intime))

hector-moreno
