## Working with CSV Data

In [1]:
import pandas as pd
df_data = pd.read_html('''
https://www.basketball-reference.com/leagues/NBA_2015_totals.html
''', skiprows=1)[0]

In [2]:
df_data

Unnamed: 0,1,Quincy Acy,PF,24,NYK,68,22,1287,152,331,...,.784,79,222,301,68.1,27,22.1,60.1,147,398
0,2,Jordan Adams,SG,20,MEM,30,0,248,35,86,...,.609,9,19,28,16,16,7,14,24,94
1,3,Steven Adams,C,21,OKC,70,67,1771,217,399,...,.502,199,324,523,66,38,86,99,222,537
2,4,Jeff Adrien,PF,28,MIN,17,0,215,19,44,...,.579,23,54,77,15,4,9,9,30,60
3,5,Arron Afflalo,SG,29,TOT,78,72,2502,375,884,...,.843,27,220,247,129,41,7,116,167,1035
4,5,Arron Afflalo,SG,29,DEN,53,53,1750,281,657,...,.841,21,159,180,101,32,5,83,108,771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
669,490,Thaddeus Young,PF,26,TOT,76,68,2434,451,968,...,.655,127,284,411,173,124,25,117,171,1071
670,490,Thaddeus Young,PF,26,MIN,48,48,1605,289,641,...,.682,75,170,245,135,86,17,75,115,685
671,490,Thaddeus Young,PF,26,BRK,28,20,829,162,327,...,.606,52,114,166,38,38,8,42,56,386
672,491,Cody Zeller,C,22,CHO,62,45,1487,172,373,...,.774,97,265,362,100,34,49,62,156,472


In [3]:
df_data.columns

Index(['1', 'Quincy Acy', 'PF', '24', 'NYK', '68', '22', '1287', '152', '331',
       '.459', '18', '60', '.300', '134', '271', '.494', '.486', '76', '97',
       '.784', '79', '222', '301', '68.1', '27', '22.1', '60.1', '147', '398'],
      dtype='object')

In [4]:
df_data.dtypes

1             object
Quincy Acy    object
PF            object
24            object
NYK           object
68            object
22            object
1287          object
152           object
331           object
.459          object
18            object
60            object
.300          object
134           object
271           object
.494          object
.486          object
76            object
97            object
.784          object
79            object
222           object
301           object
68.1          object
27            object
22.1          object
60.1          object
147           object
398           object
dtype: object

### Save this dataset

In [5]:
df_data.to_csv('./NBA.csv')

In [6]:
ls

 cricket.csv   NBA.csv                                   README.ipynb
 final.csv    'Pandas _1_HTML_CSV_JSON_REQUESTS.ipynb'
 NBA_1.csv     personal.csv


In [7]:
ls -lh

total 508K
-rw-rw-r-- 1 fact12 fact12 5.4K Feb 28 10:46  cricket.csv
-rw-rw-r-- 1 fact12 fact12 5.7K Feb 28 10:46  final.csv
-rw-rw-r-- 1 fact12 fact12  77K Feb 28 10:46  NBA_1.csv
-rw-rw-r-- 1 fact12 fact12  79K Feb 28 17:21  NBA.csv
-rw-rw-r-- 1 fact12 fact12 324K Feb 28 10:48 'Pandas _1_HTML_CSV_JSON_REQUESTS.ipynb'
-rw-rw-r-- 1 fact12 fact12  127 Feb 28 10:46  personal.csv
-rw-rw-r-- 1 fact12 fact12  684 Feb 27 10:36  README.ipynb


### Remove Index Column

In [8]:
df_data.to_csv('./NBA_1.csv', index=False)

### Example  - 2

In [9]:
cricket_df = pd.read_html('https://stats.espncricinfo.com/ci/engine/records/averages/batting.html?class=3;id=6;type=team', skiprows=1)[0]

In [10]:
cricket_df

Unnamed: 0,AB Agarkar,2006-2007,4,2,0,15,14,7.50,11,136.36,0.1,0.2,0.3,2.1,0.4
0,KK Ahmed,2018-2019,14,1,1,1,1*,-,1,100.00,0,0,0,0,0
1,S Aravind,2015-2015,1,-,-,-,-,-,-,-,-,-,-,-,-
2,R Ashwin,2010-2021,51,11,7,123,31*,30.75,115,106.95,0,0,0,14,1
3,Avesh Khan,2022-2022,2,-,-,-,-,-,-,-,-,-,-,-,-
4,P Awana,2012-2012,2,-,-,-,-,-,-,-,-,-,-,-,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,R Vinay Kumar,2010-2013,9,1,1,2,2*,-,4,50.00,0,0,0,0,0
92,Washington Sundar,2017-2021,31,11,4,47,14*,6.71,32,146.87,0,0,3,4,3
93,SA Yadav,2021-2022,14,12,3,351,65,39.00,212,165.56,0,4,1,32,20
94,UT Yadav,2012-2019,7,1,0,2,2,2.00,4,50.00,0,0,0,0,0


### Store the data to csv file

In [11]:
cricket_df.to_csv('cricket.csv', index=False)

## Working with JSON DATA

In [12]:
data="""
{
  "name": "jissmon jose",
  "email id": "jissmon@gmail.com",
  "tech": [
    "front end",
    "python"
  ],
  "platform": [
    "windows",
    "ubuntu"
  ]
}
"""

In [13]:
data

'\n{\n  "name": "jissmon jose",\n  "email id": "jissmon@gmail.com",\n  "tech": [\n    "front end",\n    "python"\n  ],\n  "platform": [\n    "windows",\n    "ubuntu"\n  ]\n}\n'

### Convert this json data to dictionary using json.loads()

In [14]:
import json

In [15]:
try:
    data_cv = json.loads(data)
    print(data_cv)
except Exception as err:
    print(err.args)

{'name': 'jissmon jose', 'email id': 'jissmon@gmail.com', 'tech': ['front end', 'python'], 'platform': ['windows', 'ubuntu']}


In [16]:
type(data_cv)

dict

In [17]:
data_cv['name']

'jissmon jose'

In [18]:
data_cv['tech']

['front end', 'python']

### Convert this data to dataframe using pd.DataFrame()

In [19]:
pd.DataFrame(data_cv)

Unnamed: 0,name,email id,tech,platform
0,jissmon jose,jissmon@gmail.com,front end,windows
1,jissmon jose,jissmon@gmail.com,python,ubuntu


### Convert this data to Dataframe using read_json

In [20]:
dt_df = pd.read_json(data)

In [21]:
dt_df

Unnamed: 0,name,email id,tech,platform
0,jissmon jose,jissmon@gmail.com,front end,windows
1,jissmon jose,jissmon@gmail.com,python,ubuntu


In [22]:
type(dt_df)

pandas.core.frame.DataFrame

In [23]:
dt_df.columns

Index(['name', 'email id', 'tech', 'platform'], dtype='object')

In [24]:
# Save dataframe to CSV 

In [25]:
dt_df.to_csv('personal.csv')

### Example - 3

In [26]:
data = """
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters": {
  "batter": [
    {
      "id": "1001",
      "type": "Regular"
    },
    {
      "id": "1002",
      "type": "Chocolate"
    },
    {
      "id": "1003",
      "type": "Blueberry"
    },
    {
      "id": "1004",
      "type": "Devil's Food"
    }
  ]
},
"topping": [
  {
    "id": "5001",
    "type": "None"
  },
  {
    "id": "5002",
    "type": "Glazed"
  },
  {
    "id": "5005",
    "type": "Sugar"
  },
  {
    "id": "5007",
    "type": "Powdered Sugar"
  },
  {
    "id": "5006",
    "type": "Chocolate with Sprinkles"
  },
  {
    "id": "5003",
    "type": "Chocolate"
  },
  {
    "id": "5004",
    "type": "Maple"
  }
]
}
"""


In [27]:
data

'\n"id": "0001",\n"type": "donut",\n"name": "Cake",\n"ppu": 0.55,\n"batters": {\n  "batter": [\n    {\n      "id": "1001",\n      "type": "Regular"\n    },\n    {\n      "id": "1002",\n      "type": "Chocolate"\n    },\n    {\n      "id": "1003",\n      "type": "Blueberry"\n    },\n    {\n      "id": "1004",\n      "type": "Devil\'s Food"\n    }\n  ]\n},\n"topping": [\n  {\n    "id": "5001",\n    "type": "None"\n  },\n  {\n    "id": "5002",\n    "type": "Glazed"\n  },\n  {\n    "id": "5005",\n    "type": "Sugar"\n  },\n  {\n    "id": "5007",\n    "type": "Powdered Sugar"\n  },\n  {\n    "id": "5006",\n    "type": "Chocolate with Sprinkles"\n  },\n  {\n    "id": "5003",\n    "type": "Chocolate"\n  },\n  {\n    "id": "5004",\n    "type": "Maple"\n  }\n]\n}\n'

In [28]:
try:
    data = json.loads(data)
    result = pd.DataFrame(data)
    print(result)
except Exception as err:
    print(err.args)

('Extra data: line 2 column 5 (char 5)',)


### Example - 4

In [29]:
data_1 = d = """
{"packetType":"D","data":{"checkEngineLightFlag":"F","batteryVoltageStableTime":0,"batteryVoltageStable":"0","batteryVoltageOff":"12.42","batteryCrankParamTN":"-0.08","batteryCrankParamVN":"0.00","batteryCrankParamTP":"-0.08","batteryCrankParamVP":"0.00","batteryCrankParamTT":"-0.00008","batteryCrankParamV0":"0.00","batteryVoltageMaxOn":"13.05","batteryVoltageMinOn":"12.97","batteryVoltageMaxOff":"12.46","batteryVoltageMinOff":"12.36","batteryVoltageOnAverage":"13.02","engineLoadMax":"84","engineLoadAverage":"39.98","rpmMax":"3487","rpmAverage":"1431.29","gpsSpeedAverage":"21.99","vssMax":"53.44","vssAverage":"23.06","tcuTemperatureMin":"82.40","tcuTemperatureMax":"109.40","tcuTemperatureAverage":"104.87","coolantMin":"158.00","coolantMax":"188.60","coolantAverage":"180.20","packetStartLocal":1508143346000,"tripStartLocal":1508143346000,"milIndicator":"F","monitorsNotReady":0,"imei":"60DF5417","gatewayTs":1515613306592,"diagnosticTroubleCodeData":[],"diagnosticPidData":[[64768,47,100],[64768,1,517376],[64800,1,262144],[64768,5,125]]},"header":{"iwrapVer":"1.9.20","sourceSystem":"CDP","configVer":"1.1","oemName":"HUM","unitType":0,"cpVer":"7.50.1.9","igpsVer":"1.3.7","messageType":"Notification","pomVer":"1.0","headerVer":"V6","timestamp":0,"deviceType":"InDrive","visorVer":"1.4.35","transactionId":"53098471-7787-4160-94b3-cd69dcc70416","deviceSerialNo":"60DF5417","subOrganization":"HUM","organization":"HUM","imei":"60DF5417","operation":"Notification"}}
"""

In [30]:
data_1

'\n{"packetType":"D","data":{"checkEngineLightFlag":"F","batteryVoltageStableTime":0,"batteryVoltageStable":"0","batteryVoltageOff":"12.42","batteryCrankParamTN":"-0.08","batteryCrankParamVN":"0.00","batteryCrankParamTP":"-0.08","batteryCrankParamVP":"0.00","batteryCrankParamTT":"-0.00008","batteryCrankParamV0":"0.00","batteryVoltageMaxOn":"13.05","batteryVoltageMinOn":"12.97","batteryVoltageMaxOff":"12.46","batteryVoltageMinOff":"12.36","batteryVoltageOnAverage":"13.02","engineLoadMax":"84","engineLoadAverage":"39.98","rpmMax":"3487","rpmAverage":"1431.29","gpsSpeedAverage":"21.99","vssMax":"53.44","vssAverage":"23.06","tcuTemperatureMin":"82.40","tcuTemperatureMax":"109.40","tcuTemperatureAverage":"104.87","coolantMin":"158.00","coolantMax":"188.60","coolantAverage":"180.20","packetStartLocal":1508143346000,"tripStartLocal":1508143346000,"milIndicator":"F","monitorsNotReady":0,"imei":"60DF5417","gatewayTs":1515613306592,"diagnosticTroubleCodeData":[],"diagnosticPidData":[[64768,47,10

In [31]:
result_dt = json.loads(data_1)

In [32]:
pd.DataFrame(result_dt)

Unnamed: 0,packetType,data,header
checkEngineLightFlag,D,F,
batteryVoltageStableTime,D,0,
batteryVoltageStable,D,0,
batteryVoltageOff,D,12.42,
batteryCrankParamTN,D,-0.08,
batteryCrankParamVN,D,0.00,
batteryCrankParamTP,D,-0.08,
batteryCrankParamVP,D,0.00,
batteryCrankParamTT,D,-0.00008,
batteryCrankParamV0,D,0.00,


## Extracting JSON Data

In [33]:
github_data = pd.read_json('https://api.github.com/repos/pandas-dev/pandas/issues')

In [34]:
github_data.head()

Unnamed: 0,url,repository_url,labels_url,comments_url,events_url,html_url,id,node_id,number,title,...,updated_at,closed_at,author_association,active_lock_reason,body,reactions,timeline_url,performed_via_github_app,draft,pull_request
0,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/issues/46178,1153888271,I_kwDOAA0YD85ExvAP,46178,BUG: Merge error when merge col is int64 and I...,...,2022-02-28 10:11:45+00:00,NaT,NONE,,### Pandas version checks\r\n\r\n- [X] I have ...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,
1,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/46177,1153704239,PR_kwDOAA0YD84zpQlY,46177,DOC: Move general utility functions to better ...,...,2022-02-28 06:22:11+00:00,NaT,MEMBER,,- [x] All [code checks passed](https://pandas....,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,0.0,{'url': 'https://api.github.com/repos/pandas-d...
2,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/issues/46176,1153565414,I_kwDOAA0YD85EwgLm,46176,getsizeof usage for memory utilization estima...,...,2022-02-28 03:08:49+00:00,NaT,NONE,,### Pandas version checks\n\n- [X] I have chec...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,,
3,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/46175,1153564804,PR_kwDOAA0YD84zoyGW,46175,PERF: avoid cast in algos.rank,...,2022-02-28 03:08:02+00:00,NaT,MEMBER,,The real goal here is allowing us to use numer...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,0.0,{'url': 'https://api.github.com/repos/pandas-d...
4,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://api.github.com/repos/pandas-dev/pandas...,https://github.com/pandas-dev/pandas/pull/46174,1153468610,PR_kwDOAA0YD84zodR3,46174,PERF: faster corrwith method for pearson and s...,...,2022-02-28 05:45:16+00:00,NaT,NONE,,## Description of Changes\r\n\r\nThis PR modif...,{'url': 'https://api.github.com/repos/pandas-d...,https://api.github.com/repos/pandas-dev/pandas...,,0.0,{'url': 'https://api.github.com/repos/pandas-d...


In [35]:
github_data.columns

Index(['url', 'repository_url', 'labels_url', 'comments_url', 'events_url',
       'html_url', 'id', 'node_id', 'number', 'title', 'user', 'labels',
       'state', 'locked', 'assignee', 'assignees', 'milestone', 'comments',
       'created_at', 'updated_at', 'closed_at', 'author_association',
       'active_lock_reason', 'body', 'reactions', 'timeline_url',
       'performed_via_github_app', 'draft', 'pull_request'],
      dtype='object')

## Using Request Method

In [36]:
import requests
data = requests.get('https://api.github.com/repos/pandas-dev/pandas/issues')
data

<Response [200]>

In [37]:
data_1 = data.json()

In [38]:
data_1

[{'url': 'https://api.github.com/repos/pandas-dev/pandas/issues/46178',
  'repository_url': 'https://api.github.com/repos/pandas-dev/pandas',
  'labels_url': 'https://api.github.com/repos/pandas-dev/pandas/issues/46178/labels{/name}',
  'comments_url': 'https://api.github.com/repos/pandas-dev/pandas/issues/46178/comments',
  'events_url': 'https://api.github.com/repos/pandas-dev/pandas/issues/46178/events',
  'html_url': 'https://github.com/pandas-dev/pandas/issues/46178',
  'id': 1153888271,
  'node_id': 'I_kwDOAA0YD85ExvAP',
  'number': 46178,
  'title': 'BUG: Merge error when merge col is int64 and Int64',
  'user': {'login': 'tritemio',
   'id': 4156237,
   'node_id': 'MDQ6VXNlcjQxNTYyMzc=',
   'avatar_url': 'https://avatars.githubusercontent.com/u/4156237?v=4',
   'gravatar_id': '',
   'url': 'https://api.github.com/users/tritemio',
   'html_url': 'https://github.com/tritemio',
   'followers_url': 'https://api.github.com/users/tritemio/followers',
   'following_url': 'https://api.

In [39]:
# get user id from each dictionary

In [40]:
extract_id = []
for ids in range(len(data_1)):
    user_id = data_1[ids]['user']['id']    
    extract_id.append(user_id)
print(extract_id)

[4156237, 10647082, 100502242, 8078968, 10540830, 10297014, 8078968, 15113894, 10647082, 36619387, 8011761, 8519523, 36619387, 8519523, 4667801, 7980666, 8179126, 6145801, 10647082, 45396231, 36619387, 89028359, 10712109, 10712109, 36071676, 7346303, 294943, 3236794, 12810626, 93594841]


In [41]:
# Show Repository url, comment url from data

In [42]:
data_new = pd.DataFrame(data_1, columns=['repository_url', 'labels_url', 'comments_url'])

In [43]:
data_new.to_csv('final.csv')

### Task 1

### create columns based on keys available on users dictionary.

In [44]:
github_data['user'][1]

{'login': 'mroeschke',
 'id': 10647082,
 'node_id': 'MDQ6VXNlcjEwNjQ3MDgy',
 'avatar_url': 'https://avatars.githubusercontent.com/u/10647082?v=4',
 'gravatar_id': '',
 'url': 'https://api.github.com/users/mroeschke',
 'html_url': 'https://github.com/mroeschke',
 'followers_url': 'https://api.github.com/users/mroeschke/followers',
 'following_url': 'https://api.github.com/users/mroeschke/following{/other_user}',
 'gists_url': 'https://api.github.com/users/mroeschke/gists{/gist_id}',
 'starred_url': 'https://api.github.com/users/mroeschke/starred{/owner}{/repo}',
 'subscriptions_url': 'https://api.github.com/users/mroeschke/subscriptions',
 'organizations_url': 'https://api.github.com/users/mroeschke/orgs',
 'repos_url': 'https://api.github.com/users/mroeschke/repos',
 'events_url': 'https://api.github.com/users/mroeschke/events{/privacy}',
 'received_events_url': 'https://api.github.com/users/mroeschke/received_events',
 'type': 'User',
 'site_admin': False}

In [45]:
users_frame = pd.DataFrame(github_data, columns=['user'])
users_frame

Unnamed: 0,user
0,"{'login': 'tritemio', 'id': 4156237, 'node_id'..."
1,"{'login': 'mroeschke', 'id': 10647082, 'node_i..."
2,"{'login': 'mach881040', 'id': 100502242, 'node..."
3,"{'login': 'jbrockmendel', 'id': 8078968, 'node..."
4,"{'login': 'fractionalhare', 'id': 10540830, 'n..."
5,"{'login': 'macsakow', 'id': 10297014, 'node_id..."
6,"{'login': 'jbrockmendel', 'id': 8078968, 'node..."
7,"{'login': 'Dr-Irv', 'id': 15113894, 'node_id':..."
8,"{'login': 'mroeschke', 'id': 10647082, 'node_i..."
9,"{'login': 'weikhor', 'id': 36619387, 'node_id'..."


In [73]:
def collect_data():    
    global logins = []
    global ids = []
    global node_ids = []
    global avatar_urls = []
    global gravatar_ids = []
    global urls = []
    global html_urls= []
    for info in users_frame.values:
        logins.append(info[0]['login'])
        ids.append(info[0]['id'])
        node_ids.append(info[0]['node_id'])
        avatar_urls.append(info[0]['avatar_url'])
        gravatar_ids.append(info[0]['gravatar_id'])
        urls.append(info[0]['url'])
        html_urls.append(info[0]['html_url'])
    print(html_urls)

In [74]:
collect_data()

['https://github.com/tritemio', 'https://github.com/mroeschke', 'https://github.com/mach881040', 'https://github.com/jbrockmendel', 'https://github.com/fractionalhare', 'https://github.com/macsakow', 'https://github.com/jbrockmendel', 'https://github.com/Dr-Irv', 'https://github.com/mroeschke', 'https://github.com/weikhor', 'https://github.com/J0', 'https://github.com/lukemanley', 'https://github.com/weikhor', 'https://github.com/lukemanley', 'https://github.com/milotoor', 'https://github.com/tehunter', 'https://github.com/jukiewiczm', 'https://github.com/harahu', 'https://github.com/mroeschke', 'https://github.com/vnlitvinov', 'https://github.com/weikhor', 'https://github.com/timmy-ops', 'https://github.com/mvashishtha', 'https://github.com/mvashishtha', 'https://github.com/McToel', 'https://github.com/timlod', 'https://github.com/Sleepingwell', 'https://github.com/smarie', 'https://github.com/benjamrio', 'https://github.com/alanhowells']


In [81]:
# add key value pairs to userd_dict dictionary
def data_to_dictionary():
    global users_dict
    users_dict = {}
    users_dict['login'] = logins
    users_dict['ids'] = ids
    users_dict['node_ids'] = node_ids
    users_dict['avatar_urls'] = avatar_urls
    users_dict['gravatar_ids'] = gravatar_ids
    users_dict['urls'] = urls
    users_dict['html_urls'] = html_urls
    print(users_dict)
data_to_dictionary()

{'login': ['tritemio', 'mroeschke', 'mach881040', 'jbrockmendel', 'fractionalhare', 'macsakow', 'jbrockmendel', 'Dr-Irv', 'mroeschke', 'weikhor', 'J0', 'lukemanley', 'weikhor', 'lukemanley', 'milotoor', 'tehunter', 'jukiewiczm', 'harahu', 'mroeschke', 'vnlitvinov', 'weikhor', 'timmy-ops', 'mvashishtha', 'mvashishtha', 'McToel', 'timlod', 'Sleepingwell', 'smarie', 'benjamrio', 'alanhowells'], 'ids': [4156237, 10647082, 100502242, 8078968, 10540830, 10297014, 8078968, 15113894, 10647082, 36619387, 8011761, 8519523, 36619387, 8519523, 4667801, 7980666, 8179126, 6145801, 10647082, 45396231, 36619387, 89028359, 10712109, 10712109, 36071676, 7346303, 294943, 3236794, 12810626, 93594841], 'node_ids': ['MDQ6VXNlcjQxNTYyMzc=', 'MDQ6VXNlcjEwNjQ3MDgy', 'U_kgDOBf2K4g', 'MDQ6VXNlcjgwNzg5Njg=', 'MDQ6VXNlcjEwNTQwODMw', 'MDQ6VXNlcjEwMjk3MDE0', 'MDQ6VXNlcjgwNzg5Njg=', 'MDQ6VXNlcjE1MTEzODk0', 'MDQ6VXNlcjEwNjQ3MDgy', 'MDQ6VXNlcjM2NjE5Mzg3', 'MDQ6VXNlcjgwMTE3NjE=', 'MDQ6VXNlcjg1MTk1MjM=', 'MDQ6VXNlcjM2NjE

In [83]:
# Create a dataframe from users dictionary created above
users_df = pd.DataFrame(users_dict)
users_df

Unnamed: 0,login,ids,node_ids,avatar_urls,gravatar_ids,urls,html_urls
0,tritemio,4156237,MDQ6VXNlcjQxNTYyMzc=,https://avatars.githubusercontent.com/u/415623...,,https://api.github.com/users/tritemio,https://github.com/tritemio
1,mroeschke,10647082,MDQ6VXNlcjEwNjQ3MDgy,https://avatars.githubusercontent.com/u/106470...,,https://api.github.com/users/mroeschke,https://github.com/mroeschke
2,mach881040,100502242,U_kgDOBf2K4g,https://avatars.githubusercontent.com/u/100502...,,https://api.github.com/users/mach881040,https://github.com/mach881040
3,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel
4,fractionalhare,10540830,MDQ6VXNlcjEwNTQwODMw,https://avatars.githubusercontent.com/u/105408...,,https://api.github.com/users/fractionalhare,https://github.com/fractionalhare
5,macsakow,10297014,MDQ6VXNlcjEwMjk3MDE0,https://avatars.githubusercontent.com/u/102970...,,https://api.github.com/users/macsakow,https://github.com/macsakow
6,jbrockmendel,8078968,MDQ6VXNlcjgwNzg5Njg=,https://avatars.githubusercontent.com/u/807896...,,https://api.github.com/users/jbrockmendel,https://github.com/jbrockmendel
7,Dr-Irv,15113894,MDQ6VXNlcjE1MTEzODk0,https://avatars.githubusercontent.com/u/151138...,,https://api.github.com/users/Dr-Irv,https://github.com/Dr-Irv
8,mroeschke,10647082,MDQ6VXNlcjEwNjQ3MDgy,https://avatars.githubusercontent.com/u/106470...,,https://api.github.com/users/mroeschke,https://github.com/mroeschke
9,weikhor,36619387,MDQ6VXNlcjM2NjE5Mzg3,https://avatars.githubusercontent.com/u/366193...,,https://api.github.com/users/weikhor,https://github.com/weikhor


In [64]:
logins = []
ids = []
node_ids = []
for info in users_frame.values:
    print(info[0])
    print(len(info))
    
print(logins)

{'login': 'tritemio', 'id': 4156237, 'node_id': 'MDQ6VXNlcjQxNTYyMzc=', 'avatar_url': 'https://avatars.githubusercontent.com/u/4156237?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/tritemio', 'html_url': 'https://github.com/tritemio', 'followers_url': 'https://api.github.com/users/tritemio/followers', 'following_url': 'https://api.github.com/users/tritemio/following{/other_user}', 'gists_url': 'https://api.github.com/users/tritemio/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/tritemio/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/tritemio/subscriptions', 'organizations_url': 'https://api.github.com/users/tritemio/orgs', 'repos_url': 'https://api.github.com/users/tritemio/repos', 'events_url': 'https://api.github.com/users/tritemio/events{/privacy}', 'received_events_url': 'https://api.github.com/users/tritemio/received_events', 'type': 'User', 'site_admin': False}
1
{'login': 'mroeschke', 'id': 10647082, 'node_id': 'MDQ6VX

In [65]:
sample_dt = {
    'id': [8901, 9022, 9022],
    'name': ['jiss', 'justin', 'abin']
}

In [66]:
df= pd.DataFrame(sample_dt)

Unnamed: 0,id,name
0,8901,jiss
1,9022,justin
2,9022,abin
