In [1]:
# Dependencies
import requests
import json
from pprint import pprint
import pandas as pd
import numpy as np
import datetime

In [2]:
# Host: pplapi.com
# ----------------

# Sample from Agents in a Country (JSON):
# --------------------------------------

# Select a quantity of agents from the given country at random, and return the results as a JSON file.

# GET /batch/(int: quantity)/country/(string: country_code)/sample.json

# Query Parameters:
 
# quantity – an integer > 0 and ≤ 500
# country_code – an ISO 3166 country code

# ISO 3166-1 alpha-2 codes are two-letter country codes defined in ISO 3166-1, part of the ISO 3166 standard published
# by the International Organization for Standardization (ISO), to represent countries, dependent territories, and special
# areas of geographical interest. They are the most widely used of the country codes published by ISO (the others being
# alpha-3 and numeric), and are used most prominently for the Internet's country code top-level domains (with a few
# exceptions).[1] They are also used as country identifiers extending the postal code when appropriate within the
# international postal system for paper mail, and has replaced the previous one consisting one-letter codes. They were
# first included as part of the ISO 3166 standard in its first edition in 1974.

# US	United States of America	1974	.us	ISO 3166-2:US

# Example 1:
    
# In this example, sample a quantity of 2 agents from Canada. The ISO 3166 country code for Canada is “ca” so that
# is added to the URL.

# http://pplapi.com/batch/2/country/ca/sample.json

# Example request:

# GET /batch/2/country/ca/sample.json

# Example 2:
    
# Get a sample of 5 agents from a Country

# http://pplapi.com/batch/5/country/ca/sample.json

In [3]:
# Format the URL
country = "us"
num_of_samples = 5
url = "http://pplapi.com/batch/"
query_url = f"{url}{num_of_samples}/country/{country}/sample.json"
print(f"The query URL is {query_url}")

The query URL is http://pplapi.com/batch/5/country/us/sample.json


In [4]:
# Make the API call/request
print(requests.get(query_url))  
print()

# Retrieving the data
data = requests.get(query_url).json()
pprint(data)
print(f"Response Type: {type(data)}")
# print(f"Response Length: {len(data)}")

<Response [200]>

[{'age': 9,
  'agreeableness': 1.2025860118348053,
  'conscientiousness': -0.004109723437627969,
  'country_name': 'United States',
  'country_tld': 'us',
  'date_of_birth': '2009-12-01',
  'extraversion': -0.18139516880555667,
  'id': 2661463106,
  'id_str': 'DF8-5MI',
  'income': 27245,
  'internet': True,
  'language': 'English',
  'latitude': 37.28873812302055,
  'longitude': -97.78608677435732,
  'neuroticism': -1.192728508654757,
  'openness': -1.4521329064105104,
  'religion': 'Roman Catholic',
  'sex': 'Female'},
 {'age': 51,
  'agreeableness': 0.8448236618490276,
  'conscientiousness': 1.1316367020701534,
  'country_name': 'United States',
  'country_tld': 'us',
  'date_of_birth': '1967-11-22',
  'extraversion': 0.616068994469981,
  'id': 2773500116,
  'id_str': 'ynd-mN9',
  'income': 56468,
  'internet': True,
  'language': 'English',
  'latitude': 34.34837327707986,
  'longitude': -103.24486180955772,
  'neuroticism': -0.23644730838507694,
  'openness': 0.4

In [5]:
# Determine the most prevalent religion for our sample of users
###############################################################

religion_list = []
# print(data[0]["religion"]) # For testing purposes only

for x in range(num_of_samples):
#     print(f"Processing sample # {x+1}") # For testing purposes only
    religion_list.append(data[x]["religion"])

print(f"The list of religions from our sample of {num_of_samples} is:")
print(f"----------------------------------------------")
print()
for rel in religion_list:
    print(rel)

The list of religions from our sample of 5 is:
----------------------------------------------

Roman Catholic
Protestant
Roman Catholic
unaffiliated
Roman Catholic


In [6]:
religions_df = pd.DataFrame(religion_list)
print(religions_df)

rel_count = religions_df[0].value_counts(normalize=False, sort=True, ascending=False)
print()
print(f"The count of religions in our sample of {num_of_samples} is:")
print(rel_count)

rel_count_df = pd.DataFrame(rel_count)
rel_count_df

                0
0  Roman Catholic
1      Protestant
2  Roman Catholic
3    unaffiliated
4  Roman Catholic

The count of religions in our sample of 5 is:
Roman Catholic    3
Protestant        1
unaffiliated      1
Name: 0, dtype: int64


Unnamed: 0,0
Roman Catholic,3
Protestant,1
unaffiliated,1


In [7]:
# Determine the average agreeableness for our sample of users
#############################################################

agree_list = []
# print(data[0]["agreeableness"]) # For testing purposes only

for y in range(num_of_samples):
#     print(f"Processing sample # {x+1}") # For testing purposes only
    agree_list.append(data[y]["agreeableness"])

print(f"The list of agreeablenesses from our sample of {num_of_samples} is:")
print(f"----------------------------------------------------")
print()
for w in agree_list:
    print(w)
print()
avg_agree = np.mean(agree_list)
avg_agree = format(avg_agree, '.2f')
print(f"The average agreeableness of our sample of {num_of_samples} is {avg_agree}")

The list of agreeablenesses from our sample of 5 is:
----------------------------------------------------

1.2025860118348053
0.8448236618490276
0.24551850831114605
0.23767226194571064
0.1856123733401878

The average agreeableness of our sample of 5 is 0.54


In [8]:
# Calculate the range of neuroticism for your set of users
##########################################################

neuro_list = []
# print(data[0]["neuroticism"]) # For testing purposes only

for z in range(num_of_samples):
#     print(f"Processing sample # {x+1}") # For testing purposes only
    neuro_list.append(data[z]["neuroticism"])

print(f"The list of neuroticism from our sample of {num_of_samples} is:")
print(f"------------------------------------------------")
print()
for xx in neuro_list:
    print(xx)
print()

# Calculate the range of neuroticism
neuro_df = pd.DataFrame(neuro_list)
# print(neuro_df) # For testing purposes only
min_neuro = neuro_df.min()
max_neuro = neuro_df.max()
neuro_range = list(max_neuro - min_neuro)
neuro_range
for x1 in min_neuro:
    min = format(x1, ',.2f')
for x2 in max_neuro:
    max = format(x2, ',.2f')    
for x3 in neuro_range:
    range = format(x3, ',.2f')
    print(f"The range of neuroticism from our sample of {num_of_samples} is {range} (from {min} to {max})")

The list of neuroticism from our sample of 5 is:
------------------------------------------------

-1.192728508654757
-0.23644730838507694
-0.46812698629692184
1.2474882187919765
0.1349148486915648

The range of neuroticism from our sample of 5 is 2.44 (from -1.19 to 1.25)


In [9]:
# Calculate the ages of each one in our sample of 5 and determine the range of ages
###################################################################################

# Put all dates of birth for our sample in a list/data frame

dob_list = []
# print(data[0]["date_of_birth"]) # For testing purposes only

for quantity in range(num_of_samples):
#    print(f"Processing sample # {x+1}") # For testing purposes only
    dob_list.append(data[quantity]["date_of_birth"])

print(f"The list of birth dates from our sample of {num_of_samples} is:")
print(f"------------------------------------------------")
print()
for dob in dob_list:
    print(dob)

TypeError: 'str' object is not callable

In [10]:
# Get today's date

now = datetime.datetime.now()
today = f"{now.year}-{now.month}-{now.day}"
print(f"Today is {today}")
today_year = today[0:4]
print(f"Today is year {today_year}")
today_month = today[5:-3]
print(f"Today is month {today_month}")
today_day = today[8:]
print(f"Today is day {today_day}")
today_year = pd.to_numeric(today_year)
today_month = pd.to_numeric(today_month)
today_day = pd.to_numeric(today_day)

Today is 2018-12-26
Today is year 2018
Today is month 12
Today is day 26


In [None]:
# Calculate our sample's ages and put them in a list/data frame

ages_list = []
for sampdob in dob_list:
    birth_year = sampdob[0:4]
    birth_month = sampdob[5:-3]
    birth_day = sampdob[8:]
#     print(f"The birth year is {birth_year}")
#     print(f"The birth month is {birth_month}")
#     print(f"The birth day is {birth_day}")
    birth_year = pd.to_numeric(birth_year)
    birth_month = pd.to_numeric(birth_month)
    birth_day = pd.to_numeric(birth_day)
    age_year = today_year - birth_year
    if birth_month >= today_month & birth_day >= today_day:
        age_year += 1
    ages_list.append(age_year)
# print(ages_list)
print(f"The ages of our sample of {num_of_samples} are as follows:")
print()
for ages in ages_list:
    print(ages)

In [None]:
# Calculate the range of ages of our sample

ages_df = pd.DataFrame(ages_list)
# print(neuro_df) # For testing purposes only
min_age = ages_df.min()
max_age = ages_df.max()
age_range = list(max_age - min_age)
# age_range
for x9 in min_age:
    min = format(x9, ',')
for x8 in max_age:
    max = format(x8, ',')    
for x7 in age_range:
    range = format(x7, ',')
    print(f"The range of ages from our sample of {num_of_samples} is {range} (from {min} to {max})")