In [6]:
# Dependencies
import requests
import json
from pprint import pprint
import pandas as pd
import numpy as np

In [7]:
# Host: pplapi.com
# ----------------

# Sample from Agents in a Country (JSON):
# --------------------------------------

# Select a quantity of agents from the given country at random, and return the results as a JSON file.

# GET /batch/(int: quantity)/country/(string: country_code)/sample.json

# Query Parameters:
 
# quantity – an integer > 0 and ≤ 500
# country_code – an ISO 3166 country code

# ISO 3166-1 alpha-2 codes are two-letter country codes defined in ISO 3166-1, part of the ISO 3166 standard published
# by the International Organization for Standardization (ISO), to represent countries, dependent territories, and special
# areas of geographical interest. They are the most widely used of the country codes published by ISO (the others being
# alpha-3 and numeric), and are used most prominently for the Internet's country code top-level domains (with a few
# exceptions).[1] They are also used as country identifiers extending the postal code when appropriate within the
# international postal system for paper mail, and has replaced the previous one consisting one-letter codes. They were
# first included as part of the ISO 3166 standard in its first edition in 1974.

# US	United States of America	1974	.us	ISO 3166-2:US

# Example 1:
    
# In this example, sample a quantity of 2 agents from Canada. The ISO 3166 country code for Canada is “ca” so that
# is added to the URL.

# http://pplapi.com/batch/2/country/ca/sample.json

# Example request:

# GET /batch/2/country/ca/sample.json

# Example 2:
    
# Get a sample of 5 agents from a Country

# http://pplapi.com/batch/5/country/ca/sample.json

In [8]:
# Format the URL
country = "us"
num_of_samples = 5
url = "http://pplapi.com/batch/"
query_url = f"{url}{num_of_samples}/country/{country}/sample.json"
print(f"The query URL is {query_url}")

The query URL is http://pplapi.com/batch/5/country/us/sample.json


In [9]:
# Make the API call/request
print(requests.get(query_url))  
print()

# Retrieving the data
data = requests.get(query_url).json()
pprint(data)
print(f"Response Type: {type(data)}")
# print(f"Response Length: {len(data)}")

<Response [200]>

[{'age': 56,
  'agreeableness': 0.5609318401419694,
  'conscientiousness': -0.04869555696979684,
  'country_name': 'United States',
  'country_tld': 'us',
  'date_of_birth': '1962-08-24',
  'extraversion': -0.01487176442813303,
  'id': 2875571253,
  'id_str': '9ab-FZJ',
  'income': 48263,
  'internet': True,
  'language': 'English',
  'latitude': 36.169535489339815,
  'longitude': -95.25150926754074,
  'neuroticism': 0.6715326059566435,
  'openness': -0.5852817728486519,
  'religion': 'unaffiliated',
  'sex': 'Female'},
 {'age': 98,
  'agreeableness': 1.5095364355658156,
  'conscientiousness': 0.7916371870512597,
  'country_name': 'United States',
  'country_tld': 'us',
  'date_of_birth': '1920-12-24',
  'extraversion': 0.8952459769458732,
  'id': 2685632646,
  'id_str': 'LI0-OTt',
  'income': 44770,
  'internet': True,
  'language': 'English',
  'latitude': 35.56136246978717,
  'longitude': -100.42074882858587,
  'neuroticism': -0.43852242762006227,
  'openness': 0.1

In [10]:
# Determine the most prevalent religion for our sample of users
###############################################################

religion_list = []
# print(data[0]["religion"]) # For testing purposes only

for x in range(num_of_samples):
#     print(f"Processing sample # {x+1}") # For testing purposes only
    religion_list.append(data[x]["religion"])

print(f"The list of religions from our sample of {num_of_samples} is:")
print(f"----------------------------------------------")
print()
for rel in religion_list:
    print(rel)

The list of religions from our sample of 5 is:
----------------------------------------------

unaffiliated
Protestant
Protestant
Roman Catholic
Roman Catholic


In [41]:
religions_df = pd.DataFrame(religion_list)
religions_df["count"] = religions_df[0].value_counts()
print("religions_df:")
print("-------------")
print(religions_df)
print()

# rel_grouped = religions_df.groupby([0])
# rel_count = rel_grouped[0].count()
# print(rel_count)

# rel_count_df = pd.DataFrame()
# rel_count_df["count"] = rel_count
# # rel_count_df["religion"] = rel_count_df[:, 0]
# rel_count_df

# rel_count = religions_df[0].value_counts(normalize=False, sort=True, ascending=False)
# print()
# print(f"The count of religions (sorted by count) in our sample of {num_of_samples} is:")
# print(f"---------------------------------------------------------------")
# print(rel_count)
# print()

# rel_count_df = pd.DataFrame(rel_count)
# print("rel_count (df):")
# print("---------------")
# print(rel_count_df)

religions_df:
-------------
                0  count
0    unaffiliated    NaN
1      Protestant    NaN
2      Protestant    NaN
3  Roman Catholic    NaN
4  Roman Catholic    NaN



In [12]:
# Determine the average agreeableness for our sample of users
#############################################################

agree_list = []
# print(data[0]["agreeableness"]) # For testing purposes only

for y in range(num_of_samples):
#     print(f"Processing sample # {x+1}") # For testing purposes only
    agree_list.append(data[y]["agreeableness"])

print(f"The list of agreeablenesses from our sample of {num_of_samples} is:")
print(f"----------------------------------------------------")
print()
for w in agree_list:
    print(w)
print()
avg_agree = np.mean(agree_list)
avg_agree = format(avg_agree, '.2f')
print(f"The average agreeableness of our sample of {num_of_samples} is {avg_agree}")

The list of agreeablenesses from our sample of 5 is:
----------------------------------------------------

0.5609318401419694
1.5095364355658156
-0.14710678494889518
0.8984886839776756
-1.4797460798211517

The average agreeableness of our sample of 5 is 0.27


In [13]:
# Calculate the range of neuroticism for your set of users
##########################################################

neuro_list = []
# print(data[0]["neuroticism"]) # For testing purposes only

for z in range(num_of_samples):
#     print(f"Processing sample # {x+1}") # For testing purposes only
    neuro_list.append(data[z]["neuroticism"])

print(f"The list of neuroticism from our sample of {num_of_samples} is:")
print(f"------------------------------------------------")
print()
for xx in neuro_list:
    print(xx)
print()

# Calculate the range of neuroticism
neuro_df = pd.DataFrame(neuro_list)
# print(neuro_df) # For testing purposes only
min_neuro = neuro_df.min()
max_neuro = neuro_df.max()
neuro_range = list(max_neuro - min_neuro)
neuro_range
for x1 in min_neuro:
    min = format(x1, ',.2f')
for x2 in max_neuro:
    max = format(x2, ',.2f')    
for x3 in neuro_range:
    range = format(x3, ',.2f')
    print(f"The range of neuroticism from our sample of {num_of_samples} is {range} (from {min} to {max})")

The list of neuroticism from our sample of 5 is:
------------------------------------------------

0.6715326059566435
-0.43852242762006227
1.0037621737183722
-0.7247104660002234
1.3682781426081478

The range of neuroticism from our sample of 5 is 2.09 (from -0.72 to 1.37)


In [14]:
# Calculate the ages of each one in our sample of 5 and determine the range of ages
###################################################################################

dob_list = []
# print(data[0]["date_of_birth"]) # For testing purposes only

for numsamp in range(num_of_samples):
#     print(f"Processing sample # {x+1}") # For testing purposes only
    dob_list.append(data[numsamp]["date_of_birth"])

print(f"The list of birth dates from our sample of {num_of_samples} is:")
print(f"------------------------------------------------")
print()
for dob in dob_list:
    print(dob)
print()

TypeError: 'str' object is not callable