# Proyecto Final IH: Recomendador de Codewars|Github

In [None]:
# https://www.codewars.com/users/leaderboard

In [1]:
# imports 

import pandas as pd
import numpy as np
import requests
import json
from bs4 import BeautifulSoup
from IPython.display import display
import time

pd.options.display.max_columns = None

## Fase 1: Obtención de usuarios de forma iterativa

### Leaderboard
Top 500 de codewars

In [2]:
# URL

url = 'https://www.codewars.com/users/leaderboard'
html = requests.get(url).content
soup = BeautifulSoup(html, "lxml")

In [3]:
# usuarios de leaderboard
def get_top_500(url='https://www.codewars.com/users/leaderboard'): 
    html = requests.get(url).content
    soup = BeautifulSoup(html, "lxml")
    return {e.text for e in soup.select('tr a')}


In [4]:
# Salvamos los nombres en un documento

def save_set_users(users, filename='../output/codewars-users.txt'): 
    ''' Save set/list of users to file '''
    with open(filename, 'w') as f: 
        f.write('\n'.join(users)) 
# save_set_users({'hola', 'adios'})

In [5]:
def save_string_users(users, filename='../output/codewars-users.txt'): 
    ''' Save string of users to file '''
    with open(filename, 'w') as f: 
        f.write(users) 
# save_set_users(users)

In [6]:
def add_set_users(users, filename='../output/codewars-users.txt'): 
    ''' Append to file '''
    with open(filename, 'a') as f: 
        f.write(users) 
# add_set_users(users)

In [7]:
def load_set_users(filename='../output/codewars-users.txt'): 
    ''' Read from file '''
    with open(filename, 'r') as f: 
        users = f.readlines()
    return {user.strip() for user in users}
# len(load_set_users())

### Get Social

In [8]:
# get users in clan

# https://www.codewars.com/users/albertogcmr/following
# https://www.codewars.com/users/albertogcmr/followers
# https://www.codewars.com/users/albertogcmr/allies

def get_social(user): 
    res = set()
    for link in ['following', 'followers', 'allies']: 
        url = 'https://www.codewars.com/users/{}/{}'.format(user, link)
        html = requests.get(url).content

        soup = BeautifulSoup(html, "lxml")
        res = res.union({e.text for e in soup.select('table a')})
    return res

# get_social('albertogcmr')

In [9]:
# cálculo de tiempos: 

def timeit(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()

        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__.upper())
            kw['log_time'][name] = int((te - ts) * 1000)
        else:
            print('%r  %2.2f ms' %  (method.__name__, (te - ts) * 1000) )
        return result

    return timed

@timeit
def print_prueba(a, b):
    print(a, b)
    
print_prueba('hola', 'adios')

hola adios
'print_prueba'  0.26 ms


### Iteración para ampliar número de usuarios

A partir de la semilla inicial de 500 obtenemos casi 4000 usuarios de codewars

In [11]:
top500 = get_top_500()
total = set().union(top500)
len(total)

499

In [None]:

@timeit
def get_social_from_users_set(users_set): 
    res = users_set
    errors = []
    for i, user in enumerate(users_set): 
        try: 
            res = res.union(get_social(user))
        except: 
            errors.append(user)
            
    save_set_users(users=errors, filename='../errors/social-error.txt')
    return res

#

total = get_social_from_users_set(total)

In [12]:

len(total)

499

In [13]:
# Me añado a mí mismo

total = total.union({'albertogcmr', 'boyander'})
for user in get_social('albertogcmr'): 
    total = total.union(get_social(user))
len(total)

534

### Persistencia de datos en un txt

In [14]:
# save_set_users(users=total, filename='../output/usuarioscodewars-3947.txt')
total = load_set_users('../output/usuarioscodewars-3947.txt')

In [15]:
len(total)

3947

## Fase 2: Obtención de datos de usuario
### 2.1 API codewars

In [16]:
# GET user

def get_user_api(user='albertogcmr'): 
    ''' Obtenemos el json de la API de codewars '''
    url = 'https://www.codewars.com/api/v1/users/{}'.format(user)
    response = requests.get(url)
    return response.json()

# get_user_api('albertogcmr')

In [17]:
def get_value_from_json(user_json, value='username'): 
    ''' 
    value en nivel 1 del json: 
    possible values = ['username', 'honor', 'clan', 'leaderboardPosition', 'skills']
    '''
    return user_json[value]

def get_score_language(user_json, language): 
    try: 
        score = user_json['ranks']['languages'][language]['score']
    except: 
        score = 0
    return score


def get_scores(user_json): 
    res = {}
    for lang in get_languages(): 
        res[lang] = get_score_language(user_json, lang)
    return res

# get_score_language(user_json=alberto, language='sca')
# get_value_from_json(alberto, 'skills')
# get_scores(alberto)

### 2.2 Web Scrapping codewars

Get stats from user

In [18]:
def get_all_stats(user): 
    url = 'https://www.codewars.com/users/{}'.format(user)
    html = requests.get(url).content
    soup = BeautifulSoup(html, "lxml")

    x = {}
    x['username'] = user
    for s in soup.select('.stat-box div'): 
        if s.text.split(':')[0] != 'Profiles': 
            x[s.text.split(':')[0].lower()] = s.text.split(':')[1]
        else: 
            try: 
                for e in s.find_all('a', href=True): 
                    if 'github' in e['href']: 
                        x['github'] = e['href']
                    if 'linkedin' in e['href']: 
                        x['linkedin'] = e['href']
            except: 
                x['github'] = ''
                x['linkedin'] = ''
    return x

# get_all_stats('albertogcmr')

In [19]:
def get_languages(url='https://www.codewars.com/kata/latest/my-languages', ignore={'all', 'my languages'}): 
    html = requests.get(url).content
    soup = BeautifulSoup(html, "lxml")
    return {e.text.lower() for e in soup.select('#language_filter option')}.difference(ignore) 

languages = get_languages()
len(languages)

38

In [20]:
def get_row(user): 
    ''' 
    Crea un diccionario con los datos recopilados que se puede
    añadir como fila en nuestro dataframe de pandas
    '''
    user_json = get_user_api(user)
    res = get_all_stats(user)
    res.update(get_scores(user_json))
    return res

def create_row(data, new_user): 
    row = get_row(new_user)
    return data.append(other=row, ignore_index=True)

# ivan = get_row('ijcernicharo')
# alberto = get_row('albertogcmr')

### 2.3 Creación de DataFrame

In [25]:
# Por cada uno de los usuarios recopilados, creamos un registro en df_users
total_n = [u for u in total][:]
len(total_n)

3947

In [24]:
@timeit
def create_df(users): 
    # print(users)
    df_res = pd.DataFrame()
    errors = []
    for i, user in enumerate(users): 
        try: 
            print(i, '\t', user)
            df_res = create_row(data=df_res, new_user=user)
        except: 
            errors.append(user)
    try: 
        save_set_users(users=errors, filename='../errors/df-row-error.txt')
    except: 
        print('Error en la escritura del archivo de error')
        print(errors)
    
    return df_res

In [26]:
df_users = create_df(total_n)
df_users.head()

# 'create_df'  9796075.92 ms

0 	 mr.tk
1 	 Caffeinatedbrew
2 	 rscharfer
3 	 tommur
4 	 caparezza
5 	 Neverlucky_One
6 	 NiteKnight
7 	 DestyNova
8 	 servantofgrace
9 	 alexcasper
10 	 Sweeper
11 	 pinglinh
12 	 Oleksiy1984
13 	 Antman.kruger
14 	 Jon.I
15 	 nwebz
16 	 lustrzanydotyk
17 	 hakatom
18 	 to-gh
19 	 Matumizuro
20 	 theisler
21 	 tweimer
22 	 Freywar
23 	 nickabbott303
24 	 p.kokkinos
25 	 pythonista101
26 	 jayeshcp
27 	 Barri
28 	 zair
29 	 MaikelNabil
30 	 C_Minor
31 	 GT37
32 	 MGO
33 	 mansyfan
34 	 RookieBart
35 	 Poppy-flower
36 	 meredithmatthews
37 	 AWice
38 	 አወል እሸቱ
39 	 nha
40 	 guzzler13
41 	 vvisoy
42 	 Diogen
43 	 Blank01
44 	 samupl
45 	 kevinrlutz
46 	 dtchbrn
47 	 gayanw
48 	 ThabangMohale
49 	 davethewiz
50 	 KEO
51 	 ArnoldPalmer
52 	 qnikst
53 	 pearlsortman
54 	 NyxTo
55 	 wekempf
56 	 amanuel2
57 	 ryan.ivan.dsouza
58 	 Torkel
59 	 ericwfink
60 	 fighterleslie
61 	 dgs
62 	 yMk4
63 	 ahnkeelee
64 	 Evgen123
65 	 Shawn Zhang
66 	 ptolemybarnes
67 	 alextv9
68 	 ctma
69 	 codewars

527 	 dramforever
528 	 beltranbot
529 	 Kmacpher
530 	 iROOT
531 	 swarog46
532 	 brianmarete
533 	 AStoe87
534 	 shanrui
535 	 Jeksonic
536 	 MarkChanner
537 	 Trizeps
538 	 ZhmAA
539 	 xihe
540 	 skillup
541 	 jazzhands
542 	 FlxMstr
543 	 alyssapatinostudent
544 	 replay
545 	 error2407
546 	 tonkhao
547 	 JustyFY
548 	 nagendra226
549 	 kakato10
550 	 eb110
551 	 frantioz
552 	 hondras
553 	 noHands
554 	 Akomer
555 	 alumbang
556 	 hannahw
557 	 carlyminjoy
558 	 Lg9ud
559 	 Ciwan
560 	 Koliuchiy
561 	 CodingWarrior
562 	 tdowek1
563 	 marwin45
564 	 lorddicki
565 	 qiao
566 	 allenmklam
567 	 omarvg_tech
568 	 gnanendra
569 	 OlegVetn
570 	 Podnimatel_pingvinov
571 	 erikgolden7
572 	 beluysanya
573 	 Svmurvj
574 	 rkdmen
575 	 mtparagon5
576 	 dplain
577 	 pedro.sampaio
578 	 raulbc777
579 	 smiks
580 	 haayhappen
581 	 vanhukset
582 	 Em-Ant
583 	 BloodyIce
584 	 pH77
585 	 ShubhamAkolkar
586 	 starms
587 	 rfonseca
588 	 the_fiXer
589 	 JohanJs
590 	 narabadziyska
591 	 taleh

1045 	 dcsmith
1046 	 negrolio
1047 	 MarcoCode
1048 	 babydragon
1049 	 nullie
1050 	 uie
1051 	 shotgunner
1052 	 scottgourley
1053 	 H1ro
1054 	 ducdongmg
1055 	 masterseeker
1056 	 surajkj
1057 	 wardymate
1058 	 rallison
1059 	 mzychaco
1060 	 peaceDM
1061 	 staticor
1062 	 gokhanozg
1063 	 kclaw99
1064 	 mythlee
1065 	 loronk
1066 	 michaelsclayton
1067 	 mmarinero
1068 	 joolius
1069 	 schmlooshed
1070 	 Havok187
1071 	 Essiar
1072 	 vgerak
1073 	 BunjiRo
1074 	 frankleton
1075 	 Panddha
1076 	 gthielon
1077 	 AliaksandrVarachai
1078 	 AnwarShah
1079 	 maximus1984
1080 	 andreasr27
1081 	 danielemanuel
1082 	 lilc4t
1083 	 swatis
1084 	 aamir7117
1085 	 mapineda
1086 	 just_yves
1087 	 glennverschooren
1088 	 alexecook
1089 	 bahalps
1090 	 Tophan
1091 	 hladik_dan
1092 	 UkioIkira
1093 	 LouieInSeattle
1094 	 erfbound
1095 	 wibblymat
1096 	 Manfredi
1097 	 Ben100-07
1098 	 hyacz
1099 	 shankarsridhar
1100 	 xrisk
1101 	 brkiesel
1102 	 Piotr_Drozdz
1103 	 beaucarnes
1104 	 gar

1535 	 chmelevskij
1536 	 chrisd19
1537 	 docgunthrop
1538 	 ButuzovDima
1539 	 greenfrog82
1540 	 elliewilliamspython
1541 	 πthon
1542 	 nazgob
1543 	 soldier-b
1544 	 emporio
1545 	 liviu_e
1546 	 nmgtav
1547 	 bside
1548 	 abhishek2k49
1549 	 missKatiaPunter
1550 	 akoptsov
1551 	 !untrue
1552 	 lanjiann
1553 	 sorcerer-ma
1554 	 danoneata
1555 	 e.mihaylin
1556 	 jansona
1557 	 iv2101
1558 	 XavierBoubert
1559 	 boatmeme
1560 	 Vikke
1561 	 dagolinuxoid
1562 	 gmcnickle
1563 	 mekano
1564 	 mmalam
1565 	 chris_steenekamp
1566 	 maround95
1567 	 Lubcdtyb
1568 	 sawamura
1569 	 Dmitry_S25
1570 	 moustafa186
1571 	 migueloruiz
1572 	 blockedUser
1573 	 gary.zhi
1574 	 GoyoGranja
1575 	 soulomoon
1576 	 costas11635
1577 	 jordanator
1578 	 cloworm
1579 	 lewis500
1580 	 ALYIP
1581 	 AshC
1582 	 noahwaterfieldprice
1583 	 mattglover
1584 	 sohail.shaghasi@gmail.com
1585 	 Infinite Pest
1586 	 stoned
1587 	 ownwaterloo
1588 	 Alex-Swann
1589 	 Baeksang
1590 	 Kyle S.
1591 	 monners
1592

2015 	 friesam
2016 	 honya
2017 	 madhurima-kandadai
2018 	 kelaiyarao1
2019 	 webmisfit
2020 	 z708808033
2021 	 coffeeismyblood
2022 	 cryptic
2023 	 hugh.samson
2024 	 mromero
2025 	 alex.husky
2026 	 athasach
2027 	 MJeorrett
2028 	 Deantwo
2029 	 marty-wang
2030 	 danielOcandoGL
2031 	 ezNNP
2032 	 headcr4sh
2033 	 HKGTreeLeaf
2034 	 janvanaa
2035 	 mitanac
2036 	 jestanoff
2037 	 mjwivell
2038 	 SGMartineau
2039 	 wolf9248
2040 	 ashtonkellis
2041 	 jeduan
2042 	 kayahr
2043 	 Joshb
2044 	 jerome4537
2045 	 nick1256
2046 	 xXxGingerxXx
2047 	 shaileshKumar
2048 	 brunolm
2049 	 -SKJ
2050 	 mi5hu
2051 	 AcesOfGlory
2052 	 coluble
2053 	 bladez
2054 	 Toni-Toni
2055 	 nicholas-zeiss
2056 	 ke1echi
2057 	 JMurphyWeb
2058 	 simplydallas
2059 	 HB85
2060 	 friedhelmensch
2061 	 jmhiggins42
2062 	 pergk
2063 	 nameni
2064 	 prosto-artem
2065 	 AshCoolman
2066 	 naomik
2067 	 abcdabcd987
2068 	 srockk
2069 	 Cuss
2070 	 Minidouf
2071 	 citrinite
2072 	 victorg1991
2073 	 gabrielsiedler

2506 	 PumpkinSeed
2507 	 qw4u
2508 	 deadCat
2509 	 grezzonico
2510 	 CodeRookie
2511 	 JorgeVS
2512 	 Urmemoris
2513 	 d1k1y
2514 	 Warloader
2515 	 Rafal_Kielbowicz
2516 	 nimajneb
2517 	 azumelzu
2518 	 bonowg
2519 	 dhritimaandas
2520 	 sunwayihep
2521 	 Aquila
2522 	 owade
2523 	 dangle
2524 	 bwblock
2525 	 atmosfeer
2526 	 Kiln
2527 	 StasDeep
2528 	 caratang
2529 	 Devilek66
2530 	 cubittz
2531 	 ale1ster
2532 	 RemyG
2533 	 Asamsig
2534 	 GKotfis
2535 	 donflopez
2536 	 Unicornelia
2537 	 eweil505
2538 	 Shawdotion
2539 	 CrazyMerlyn
2540 	 mscarey
2541 	 knife-maniac
2542 	 szsdk
2543 	 samkcdev
2544 	 pyinciple
2545 	 d3x42
2546 	 inssein
2547 	 C3realGuy
2548 	 Jakubziak
2549 	 silverHub
2550 	 GeekG1rl
2551 	 yurain29
2552 	 Fishkiller
2553 	 DawnAngel
2554 	 mehmke
2555 	 cave.on
2556 	 AnasT
2557 	 ealessi
2558 	 XxxX88
2559 	 rmprescott
2560 	 Eliv
2561 	 njohnson7
2562 	 SeanGoku11
2563 	 Yuliya_Prach
2564 	 .flo
2565 	 kurideja
2566 	 lencavin
2567 	 dcieslak
2568 	 

2999 	 seanjohn56
3000 	 julienroyer
3001 	 lukehwang
3002 	 MindWanderer
3003 	 febbyoktaviani
3004 	 jistjoalal
3005 	 samjam48
3006 	 xahigoyez
3007 	 xxxxpal
3008 	 zsolti1202
3009 	 EmilyAnn
3010 	 linxnp
3011 	 cvk77
3012 	 TaricTaric
3013 	 nicoh9
3014 	 AngryBot
3015 	 xDranik
3016 	 gkucmierz
3017 	 tmatijev
3018 	 Ban-Ath
3019 	 panthros
3020 	 soosiey
3021 	 NOvlCE
3022 	 wolfgarnet
3023 	 gurkanoluc
3024 	 metalim
3025 	 Rancho
3026 	 kibbalat
3027 	 AlexeySt
3028 	 dmivlge
3029 	 albertogcmr
3030 	 Werclues
3031 	 jqbdpgb
3032 	 elreyatee
3033 	 juanmatg
3034 	 vladkha
3035 	 gabimancini
3036 	 code.monkey
3037 	 faradey
3038 	 Xenon1840
3039 	 anamarg
3040 	 BattleRattle
3041 	 silencepy
3042 	 GitJonibek
3043 	 obrok
3044 	 Kirill23
3045 	 jonspieg
3046 	 J4CODE
3047 	 Choupinne
3048 	 yell1229
3049 	 thebradness
3050 	 JamieSK
3051 	 morrisk
3052 	 mbozhkova
3053 	 csano
3054 	 petrarch1603
3055 	 GregSebastian
3056 	 shadowx99
3057 	 sajadtorkamani
3058 	 BarHanSolo
30

3487 	 maketroli
3488 	 ChaahatJain
3489 	 timp
3490 	 esc2345
3491 	 6030999
3492 	 NHagner
3493 	 AGenevray
3494 	 anuid08
3495 	 Nanaya7
3496 	 Camachof
3497 	 kari
3498 	 lightcast
3499 	 csaden
3500 	 Valefar
3501 	 codemaster24
3502 	 PatrickO10
3503 	 RPS
3504 	 cnak
3505 	 pre1ude
3506 	 ExtinctLink
3507 	 minktom
3508 	 MysteriousMagenta
3509 	 d8aninja
3510 	 stok
3511 	 bouchert
3512 	 chuckwondo
3513 	 Hacker Sakana
3514 	 joe barnett
3515 	 veitsi
3516 	 atmortensen
3517 	 Valeriy4k
3518 	 Saul-Mirone
3519 	 Inbarhub
3520 	 apatryda
3521 	 JoshBrodieNZ
3522 	 gabs22
3523 	 michal8888
3524 	 ZakharAntoschenko
3525 	 NateBrady23
3526 	 supun123
3527 	 Laurynas Lazauskas
3528 	 PaviRaghav
3529 	 ssaitta13
3530 	 blinker345678
3531 	 pro2501
3532 	 Jammalovec
3533 	 Nelkey
3534 	 Osa-sergey
3535 	 aiglebleu
3536 	 hogan721
3537 	 pbhuvanesh
3538 	 Kamus04
3539 	 cirops
3540 	 morgoth
3541 	 ArashFrontend
3542 	 xPrzybyLx
3543 	 AlertRED
3544 	 ckarras11
3545 	 ducgiang
3546 	 

Unnamed: 0,allies,bf (beta),c,c#,c++,clan,clojure,coffeescript,collections,comments,crystal,dart,elixir,elm (beta),erlang (beta),f#,followers,following,fortran (beta),go,groovy (beta),haskell,highest trained,honor,honor percentile,java,javascript,julia (beta),kotlin (beta),kumite,last seen,leaderboard position,lua (beta),member since,most recent,name,nasm (beta),nim (beta),objective-c (beta),ocaml (beta),php,powershell (beta),purescript (beta),python,r (beta),rank,ruby,rust,scala (beta),shell,solidity (beta),sql,swift,total completed kata,total languages trained,translations,typescript,username,github,skills,4 kyu,5 kyu,6 kyu,7 kyu,8 kyu,best practice,best practice solutions,clever,clever solutions,completed kata,completed on 1st attempt,current streak,date,ended on,first completed,kata attempts,last completed,most number of days,avg. satisfaction rating,created,data types,puzzles,strings,total collected,total completions,total stars,linkedin,advanced language features,contributed kata,declarative programming,fundamentals,programming paradigms,regular expressions,avg. rank,utilities,data,databases,information systems,algorithms,character encodings,game boards,games,graphs,kata approvals,logic,mathematics,numbers,django,frameworks,algebra,geometry,arrays,basic language features,control flow,functions,metaprogramming,parsing,formats,prototypes,sparse arrays,babel,design principles,es2015,react,decryption,security,binary,binary search trees,computability theory,recursion,theoretical computer science,bits,bitwise operators,dates/time,functional programming,classes,3 kyu,authored kata,authored translations,search,data structures,object-oriented programming,variables,loops,lists,objects,arithmetic,applied computer science,machine learning,1 kyu,formatting,interpreters,2 kyu,sorting,optimization,data conversion,esoteric languages,angular,performance,integers,permutations,unicode,bugs,nodejs,queues,decoding,exception handling,sequences,statistics,networks,vectors,ascii,operators,hashes,map/reduce,refactoring,iterators,sets,cryptography,encryption,arguments,hacking holidays,filtering,trees,maps,design patterns,event handling,dynamic programming,rules,decimals,big integers,encoding,tables,physics,modules,best practices,conditional statements,observers,lambdas,computational science,state machines,inheritance,polymorphism,booleans,graphics,higher-order functions,immutability,mutability,linked lists,ranking,io,streams,expressions,properties,json,closures,reflection,dictionary,validation,reporting,ciphers
0,1,0.0,0.0,0.0,0.0,Unknown,0.0,0.0,0,2 (0 replies),0.0,0.0,0.0,0.0,0.0,0.0,2,1,0.0,0.0,0.0,0.0,JavaScript (5 kyu),376,94th,0.0,428.0,0.0,0.0,0,Feb 2017,"#33,216",0.0,Dec 2015,JavaScript,Unknown,0.0,0.0,0.0,0.0,50.0,0.0,0.0,0.0,0.0,5 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35,2,0 (0 approved),0.0,mr.tk,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,47,0.0,0.0,0.0,0.0,Unknown,0.0,16.0,0,1 (0 replies),0.0,0.0,0.0,0.0,0.0,0.0,48,48,0.0,0.0,0.0,0.0,CoffeeScript (8 kyu),49,65th,8.0,0.0,0.0,0.0,0,May 2017,"#223,367",0.0,Aug 2016,Java,Theresa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11,3,0 (0 approved),0.0,Caffeinatedbrew,https://github.com/Caffeinatedbrew,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1,0.0,0.0,0.0,0.0,Unknown,0.0,0.0,0,39 (21 replies),0.0,0.0,0.0,0.0,0.0,0.0,1,12,0.0,0.0,0.0,0.0,JavaScript (2 kyu),2089,99th,0.0,5071.0,0.0,0.0,0,Aug 2018,"#2,293",0.0,Dec 2016,NASM,Ryan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82,1,0 (0 approved),0.0,rscharfer,,"javascript, react js, html5",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,0.0,0.0,0.0,0.0,Object AI,0.0,0.0,0,8 (5 replies),0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,Python (1 kyu),7051,99th,0.0,21.0,0.0,0.0,8 (9 Started),Dec 2018,#318,0.0,Feb 2018,Python,Unknown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20110.0,0.0,1 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,315,2,0 (0 approved),0.0,tommur,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,770,0.0,0.0,0.0,0.0,UEK,0.0,0.0,0,3 (3 replies),0.0,0.0,0.0,0.0,0.0,0.0,770,770,0.0,0.0,0.0,0.0,JavaScript (3 kyu),2265,99th,2.0,2801.0,0.0,0.0,1 (1 Started),Dec 2018,"#2,035",0.0,Oct 2017,Java,Jakub Podolski,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1646.0,0.0,3 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,208,3,0 (0 approved),0.0,caparezza,https://github.com/jakubpodolski,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [28]:
df_users = create_row(data=df_users, new_user='ijcernicharo')
df_users = create_row(data=df_users, new_user='pablobarrio')
df_users.head()

Unnamed: 0,allies,bf (beta),c,c#,c++,clan,clojure,coffeescript,collections,comments,crystal,dart,elixir,elm (beta),erlang (beta),f#,followers,following,fortran (beta),go,groovy (beta),haskell,highest trained,honor,honor percentile,java,javascript,julia (beta),kotlin (beta),kumite,last seen,leaderboard position,lua (beta),member since,most recent,name,nasm (beta),nim (beta),objective-c (beta),ocaml (beta),php,powershell (beta),purescript (beta),python,r (beta),rank,ruby,rust,scala (beta),shell,solidity (beta),sql,swift,total completed kata,total languages trained,translations,typescript,username,github,skills,4 kyu,5 kyu,6 kyu,7 kyu,8 kyu,best practice,best practice solutions,clever,clever solutions,completed kata,completed on 1st attempt,current streak,date,ended on,first completed,kata attempts,last completed,most number of days,avg. satisfaction rating,created,data types,puzzles,strings,total collected,total completions,total stars,linkedin,advanced language features,contributed kata,declarative programming,fundamentals,programming paradigms,regular expressions,avg. rank,utilities,data,databases,information systems,algorithms,character encodings,game boards,games,graphs,kata approvals,logic,mathematics,numbers,django,frameworks,algebra,geometry,arrays,basic language features,control flow,functions,metaprogramming,parsing,formats,prototypes,sparse arrays,babel,design principles,es2015,react,decryption,security,binary,binary search trees,computability theory,recursion,theoretical computer science,bits,bitwise operators,dates/time,functional programming,classes,3 kyu,authored kata,authored translations,search,data structures,object-oriented programming,variables,loops,lists,objects,arithmetic,applied computer science,machine learning,1 kyu,formatting,interpreters,2 kyu,sorting,optimization,data conversion,esoteric languages,angular,performance,integers,permutations,unicode,bugs,nodejs,queues,decoding,exception handling,sequences,statistics,networks,vectors,ascii,operators,hashes,map/reduce,refactoring,iterators,sets,cryptography,encryption,arguments,hacking holidays,filtering,trees,maps,design patterns,event handling,dynamic programming,rules,decimals,big integers,encoding,tables,physics,modules,best practices,conditional statements,observers,lambdas,computational science,state machines,inheritance,polymorphism,booleans,graphics,higher-order functions,immutability,mutability,linked lists,ranking,io,streams,expressions,properties,json,closures,reflection,dictionary,validation,reporting,ciphers
0,1,0.0,0.0,0.0,0.0,Unknown,0.0,0.0,0,2 (0 replies),0.0,0.0,0.0,0.0,0.0,0.0,2,1,0.0,0.0,0.0,0.0,JavaScript (5 kyu),376,94th,0.0,428.0,0.0,0.0,0,Feb 2017,"#33,216",0.0,Dec 2015,JavaScript,Unknown,0.0,0.0,0.0,0.0,50.0,0.0,0.0,0.0,0.0,5 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35,2,0 (0 approved),0.0,mr.tk,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,47,0.0,0.0,0.0,0.0,Unknown,0.0,16.0,0,1 (0 replies),0.0,0.0,0.0,0.0,0.0,0.0,48,48,0.0,0.0,0.0,0.0,CoffeeScript (8 kyu),49,65th,8.0,0.0,0.0,0.0,0,May 2017,"#223,367",0.0,Aug 2016,Java,Theresa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11,3,0 (0 approved),0.0,Caffeinatedbrew,https://github.com/Caffeinatedbrew,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1,0.0,0.0,0.0,0.0,Unknown,0.0,0.0,0,39 (21 replies),0.0,0.0,0.0,0.0,0.0,0.0,1,12,0.0,0.0,0.0,0.0,JavaScript (2 kyu),2089,99th,0.0,5071.0,0.0,0.0,0,Aug 2018,"#2,293",0.0,Dec 2016,NASM,Ryan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,82,1,0 (0 approved),0.0,rscharfer,,"javascript, react js, html5",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,0.0,0.0,0.0,0.0,Object AI,0.0,0.0,0,8 (5 replies),0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,Python (1 kyu),7051,99th,0.0,21.0,0.0,0.0,8 (9 Started),Dec 2018,#318,0.0,Feb 2018,Python,Unknown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20110.0,0.0,1 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,315,2,0 (0 approved),0.0,tommur,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,770,0.0,0.0,0.0,0.0,UEK,0.0,0.0,0,3 (3 replies),0.0,0.0,0.0,0.0,0.0,0.0,770,770,0.0,0.0,0.0,0.0,JavaScript (3 kyu),2265,99th,2.0,2801.0,0.0,0.0,1 (1 Started),Dec 2018,"#2,035",0.0,Oct 2017,Java,Jakub Podolski,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1646.0,0.0,3 kyu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,208,3,0 (0 approved),0.0,caparezza,https://github.com/jakubpodolski,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [43]:
df_users.shape

(3939, 221)

In [44]:
df_users.to_csv('../output/df-codewars-2929x221.csv')
df = pd.read_csv('../output/df-codewars-2929x221.csv')

In [None]:
# 'linkedin' in df_users.columns

In [49]:
'pablobarrio' in list(df.username)

True

In [50]:
df.games.value_counts()


1.0      27
2.0      10
3.0       8
5.0       3
16.0      2
6.0       2
35.0      1
674.0     1
4.0       1
8.0       1
14.0      1
9.0       1
12.0      1
11.0      1
30.0      1
Name: games, dtype: int64

### 2.4 Limpieza de datos

Tenemos más de 100 columnas de las que sólo queremos las concernientes a sus datos de programación

In [None]:
# Nos vamos a quedar con las siguientes columnas
get_languages()
allies, clan, comments, followers, following, github, honor, last seen, 
leaderboard position # eliminar el #
member since, total completed kata, total languages trained, translations, username, avg. satisfaction rating, 
contributed kata, created, data structures, data types, fundamentals, graphs, kata approvals, 
total collected, total completions, total stars, skills, linkedin


## Fase 3: Persistencia de datos

In [None]:
Guarda