<a href="https://colab.research.google.com/github/hawc2/wikidata/blob/main/Querying_and_Visualizing_Wikidata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Querying and Visualizing Wikidata

This script walks through using Python to query Wikidata with SPARQL and visualize the results with Panda

Created by Jay Winkler, Alex Wermer-Colan, Synatra Smith, and Rebecca Bayek

# Installing Libraries

In [None]:
!pip install SPARQLWrapper
%load_ext google.colab.data_table 
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
import requests
import json
!pip install -U plotly
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

Collecting SPARQLWrapper
  Downloading SPARQLWrapper-1.8.5-py3-none-any.whl (26 kB)
Collecting rdflib>=4.0
  Downloading rdflib-6.1.1-py3-none-any.whl (482 kB)
[K     |████████████████████████████████| 482 kB 10.9 MB/s 
[?25hCollecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 448 kB/s 
Installing collected packages: isodate, rdflib, SPARQLWrapper
Successfully installed SPARQLWrapper-1.8.5 isodate-0.6.1 rdflib-6.1.1
Collecting plotly
  Downloading plotly-5.5.0-py2.py3-none-any.whl (26.5 MB)
[K     |████████████████████████████████| 26.5 MB 28.2 MB/s 
Collecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-5.5.0 tenacity-8.0.1


Writing my Wikidata Query

In [None]:
sparql.setQuery("""
SELECT
    ?artist ?artistLabel ?sexGenderLabel ?sexualOrientationLabel ?ethnicityLabel
    (group_concat(DISTINCT(?birthPlaceLabel);separator=", ") as ?birthPlaces)
    
WHERE
{
    ?artist wdt:P106 wd:Q483501 .
    ?artist wdt:P19 ?birthPlace .
    { ?birthPlace wdt:P31/wdt:P279* wd:Q35657. } UNION { ?birthPlace wdt:P31/wdt:P279* wd:Q1093829. } UNION { ?birthPlace wdt:P19 wd:Q30. }
    OPTIONAL { ?artist wdt:P21 ?sexGender. }
    OPTIONAL { ?artist wdt:P91 ?sexualOrientation. }
    OPTIONAL { ?artist wdt:P172 ?ethnicity. }
    SERVICE wikibase:label { 
    bd:serviceParam wikibase:language "en". 
    ?artist rdfs:label ?artistLabel . 
    ?sexGender rdfs:label ?sexGenderLabel .
    ?birthPlace rdfs:label ?birthPlaceLabel .
    ?sexualOrientation rdfs:label ?sexualOrientationLabel .
    ?ethnicity rdfs:label ?ethnicityLabel .
  }
}
GROUP BY ?artist ?artistLabel ?sexGenderLabel ?sexualOrientationLabel ?ethnicityLabel
ORDER BY ?artistLabel
""")

#This query is designed to capture a few key pieces of information about every artist with a US birthplace. The third line of my where statement asks WDQS to include every artist that is from a US state, a US city, or the United States item itself.

Querying Wikidata, looking at JSON results

In [None]:
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [None]:
[results]

[{'head': {'vars': ['artist',
    'artistLabel',
    'sexGenderLabel',
    'sexualOrientationLabel',
    'ethnicityLabel',
    'birthPlaces']},
  'results': {'bindings': [{'artist': {'type': 'uri',
      'value': 'http://www.wikidata.org/entity/Q16148018'},
     'artistLabel': {'type': 'literal', 'value': '2wenty', 'xml:lang': 'en'},
     'birthPlaces': {'type': 'literal', 'value': 'Los Angeles'},
     'sexGenderLabel': {'type': 'literal', 'value': 'male', 'xml:lang': 'en'}},
    {'artist': {'type': 'uri',
      'value': 'http://www.wikidata.org/entity/Q4647785'},
     'artistLabel': {'type': 'literal',
      'value': 'A. G. Heaton',
      'xml:lang': 'en'},
     'birthPlaces': {'type': 'literal', 'value': 'Philadelphia'},
     'sexGenderLabel': {'type': 'literal', 'value': 'male', 'xml:lang': 'en'}},
    {'artist': {'type': 'uri',
      'value': 'http://www.wikidata.org/entity/Q4648425'},
     'artistLabel': {'type': 'literal',
      'value': 'A. T. Mann',
      'xml:lang': 'en'},
   

In [None]:
data = pd.json_normalize(results['results']['bindings'])

Getting Columns for Dataframe

In [None]:
cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value'
          ]
data[cols]

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value
0,http://www.wikidata.org/entity/Q16148018,2wenty,male,,,Los Angeles
1,http://www.wikidata.org/entity/Q4647785,A. G. Heaton,male,,,Philadelphia
2,http://www.wikidata.org/entity/Q4648425,A. T. Mann,male,,,Auburn
3,http://www.wikidata.org/entity/Q28800265,A.K. Burns,female,,,Capitola
4,http://www.wikidata.org/entity/Q16197272,A.L. Steiner,female,,,Miami
...,...,...,...,...,...,...
3713,http://www.wikidata.org/entity/Q105839391,Zoray Andrus,female,,,Alameda
3714,http://www.wikidata.org/entity/Q28864849,Zoë Charlton,female,,,Tallahassee
3715,http://www.wikidata.org/entity/Q55263932,Zulma Steele,female,,,Appleton
3716,http://www.wikidata.org/entity/Q456628,Ámbar Past,female,,,Durham


Stripping QNumber from Wikidata URL, Adding QNumber to Own Column

In [None]:
new = data["artist.value"].str.split("/", n = -1, expand = True)
data["QNum"] = new[4]
#Stripping the QNumber ends up adding a bunch of columns that simply display the datatype or language, re-declaring which columns I actually want makes the data a little easier to work with.
new_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum'
          ]
data = data[new_cols]
data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum
0,http://www.wikidata.org/entity/Q16148018,2wenty,male,,,Los Angeles,Q16148018
1,http://www.wikidata.org/entity/Q4647785,A. G. Heaton,male,,,Philadelphia,Q4647785
2,http://www.wikidata.org/entity/Q4648425,A. T. Mann,male,,,Auburn,Q4648425
3,http://www.wikidata.org/entity/Q28800265,A.K. Burns,female,,,Capitola,Q28800265
4,http://www.wikidata.org/entity/Q16197272,A.L. Steiner,female,,,Miami,Q16197272
...,...,...,...,...,...,...,...
3713,http://www.wikidata.org/entity/Q105839391,Zoray Andrus,female,,,Alameda,Q105839391
3714,http://www.wikidata.org/entity/Q28864849,Zoë Charlton,female,,,Tallahassee,Q28864849
3715,http://www.wikidata.org/entity/Q55263932,Zulma Steele,female,,,Appleton,Q55263932
3716,http://www.wikidata.org/entity/Q456628,Ámbar Past,female,,,Durham,Q456628


Querying Wikidata API for First Timestamp of Each Artist

In [None]:
S = requests.Session()

URL = "https://www.wikidata.org/w/api.php"

finalDate = []

for item in data["QNum"]:

  PARAMS = {
      "action": "query",
      "format": "json",
      "prop": "revisions",
      "titles": item,
      "rvprop": "timestamp",
      "rvlimit": "1",
      "rvdir": "newer"
  }

  R = S.get(url=URL, params=PARAMS)
  RESULTS = R.json()

  PAGE_DICT = RESULTS["query"]["pages"]
  for key, value in PAGE_DICT.items() :
      ID_NUM = key

#Stripping just the date, the query automatically includes the time which I don't really need.
  DATETIME = RESULTS['query']['pages'][ID_NUM]['revisions'][0]['timestamp']
  DATE = DATETIME.split("T")[0]
  finalDate.append(DATE)

data["DateAdded"] = finalDate
data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
0,http://www.wikidata.org/entity/Q16148018,2wenty,male,,,Los Angeles,Q16148018,2014-04-07
1,http://www.wikidata.org/entity/Q4647785,A. G. Heaton,male,,,Philadelphia,Q4647785,2013-02-15
2,http://www.wikidata.org/entity/Q4648425,A. T. Mann,male,,,Auburn,Q4648425,2013-02-15
3,http://www.wikidata.org/entity/Q28800265,A.K. Burns,female,,,Capitola,Q28800265,2017-02-18
4,http://www.wikidata.org/entity/Q16197272,A.L. Steiner,female,,,Miami,Q16197272,2014-04-08
...,...,...,...,...,...,...,...,...
3713,http://www.wikidata.org/entity/Q105839391,Zoray Andrus,female,,,Alameda,Q105839391,2021-03-09
3714,http://www.wikidata.org/entity/Q28864849,Zoë Charlton,female,,,Tallahassee,Q28864849,2017-03-03
3715,http://www.wikidata.org/entity/Q55263932,Zulma Steele,female,,,Appleton,Q55263932,2018-06-29
3716,http://www.wikidata.org/entity/Q456628,Ámbar Past,female,,,Durham,Q456628,2012-12-06


Importing dateutil and retyping the dates column

In [None]:
import dateutil
data['DateAdded'] = data['DateAdded'].apply(dateutil.parser.parse, dayfirst=False)

Adding "African Americans" as a variable by grabbing it directly from a cell in my dataframe, then replacing all NaN entries in my ethnicityLabel.value column with "None"

In [None]:
a_a = data['ethnicityLabel.value'][10]
new_eth = data["ethnicityLabel.value"].fillna("None")
data["ethnicityLabel.value"] = new_eth
data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
0,http://www.wikidata.org/entity/Q16148018,2wenty,male,,,Los Angeles,Q16148018,2014-04-07
1,http://www.wikidata.org/entity/Q4647785,A. G. Heaton,male,,,Philadelphia,Q4647785,2013-02-15
2,http://www.wikidata.org/entity/Q4648425,A. T. Mann,male,,,Auburn,Q4648425,2013-02-15
3,http://www.wikidata.org/entity/Q28800265,A.K. Burns,female,,,Capitola,Q28800265,2017-02-18
4,http://www.wikidata.org/entity/Q16197272,A.L. Steiner,female,,,Miami,Q16197272,2014-04-08
...,...,...,...,...,...,...,...,...
3713,http://www.wikidata.org/entity/Q105839391,Zoray Andrus,female,,,Alameda,Q105839391,2021-03-09
3714,http://www.wikidata.org/entity/Q28864849,Zoë Charlton,female,,,Tallahassee,Q28864849,2017-03-03
3715,http://www.wikidata.org/entity/Q55263932,Zulma Steele,female,,,Appleton,Q55263932,2018-06-29
3716,http://www.wikidata.org/entity/Q456628,Ámbar Past,female,,,Durham,Q456628,2012-12-06


Adding "None" as a variable, making a list of ethnicities I want to keep, collapsing all others into "Other", updating my dataframe

In [None]:
nan = data['ethnicityLabel.value'][225]
keep = [a_a, nan]
eth_concat = data['ethnicityLabel.value'].where(data['ethnicityLabel.value'].isin(keep), "Other")
data['ethnicityLabel.value'] = eth_concat
data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
0,http://www.wikidata.org/entity/Q16148018,2wenty,male,,,Los Angeles,Q16148018,2014-04-07
1,http://www.wikidata.org/entity/Q4647785,A. G. Heaton,male,,,Philadelphia,Q4647785,2013-02-15
2,http://www.wikidata.org/entity/Q4648425,A. T. Mann,male,,,Auburn,Q4648425,2013-02-15
3,http://www.wikidata.org/entity/Q28800265,A.K. Burns,female,,,Capitola,Q28800265,2017-02-18
4,http://www.wikidata.org/entity/Q16197272,A.L. Steiner,female,,,Miami,Q16197272,2014-04-08
...,...,...,...,...,...,...,...,...
3713,http://www.wikidata.org/entity/Q105839391,Zoray Andrus,female,,,Alameda,Q105839391,2021-03-09
3714,http://www.wikidata.org/entity/Q28864849,Zoë Charlton,female,,,Tallahassee,Q28864849,2017-03-03
3715,http://www.wikidata.org/entity/Q55263932,Zulma Steele,female,,,Appleton,Q55263932,2018-06-29
3716,http://www.wikidata.org/entity/Q456628,Ámbar Past,female,,,Durham,Q456628,2012-12-06


Sorting by date, resetting index

In [None]:

data = data.sort_values(by='DateAdded')
data = data.reset_index()
data

Unnamed: 0,index,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
0,1911,http://www.wikidata.org/entity/Q4538,Jonathan Winters,male,,Other,Dayton,Q4538,2012-10-31
1,688,http://www.wikidata.org/entity/Q8372,Craig Barron,male,,,Berkeley,Q8372,2012-11-02
2,1832,http://www.wikidata.org/entity/Q8354,John Frazier,male,,,Richmond,Q8354,2012-11-02
3,62,http://www.wikidata.org/entity/Q8346,Albert Arnold Gillespie,male,,,El Paso,Q8346,2012-11-02
4,2003,http://www.wikidata.org/entity/Q15935,Kanye West,male,,African Americans,Atlanta,Q15935,2012-11-07
...,...,...,...,...,...,...,...,...,...
3713,375,http://www.wikidata.org/entity/Q110228980,Bob Diamond,,,,New York City,Q110228980,2021-12-21
3714,1216,http://www.wikidata.org/entity/Q110220845,Gail Browne,female,,,Lakewood,Q110220845,2021-12-21
3715,148,http://www.wikidata.org/entity/Q110220836,Amy Heller,female,,,"Washington, D.C.",Q110220836,2021-12-21
3716,2305,http://www.wikidata.org/entity/Q110228550,M. C. 5 Cents Jones,,,,Louisiana,Q110228550,2021-12-21


Creating my columns that track percentage change over time

In [None]:
#First, I create a list of all the values in my ethnicityLabel.value column, then empty lists to hold each of the percentages, then declare n as 0.
eth_list = data['ethnicityLabel.value'].tolist()
a_a_pct = []
other_pct = []
nan_pct = []
n = 0

#Here I iterate through my eth_list and recalculate the percentage represented by each ethnic group after every item, then add that percentage to each respective list.
for item in eth_list:
  n = n + 1
  a_a_count = eth_list[0:n].count(a_a)
  a_a_app = a_a_count / n
  a_a_pct.append(a_a_app)
  other_count = eth_list[0:n].count('Other')
  other_app = other_count / n
  other_pct.append(other_app)
  nan_count = eth_list[0:n].count(nan)
  nan_app = nan_count / n
  nan_pct.append(nan_app)

[0.0, 0.0, 0.0, 0.0, 0.2, 0.3333333333333333, 0.2857142857142857, 0.25, 0.2222222222222222, 0.2, 0.18181818181818182, 0.16666666666666666, 0.15384615384615385, 0.14285714285714285, 0.13333333333333333, 0.125, 0.11764705882352941, 0.1111111111111111, 0.15789473684210525, 0.15, 0.14285714285714285, 0.13636363636363635, 0.13043478260869565, 0.125, 0.12, 0.11538461538461539, 0.1111111111111111, 0.10714285714285714, 0.10344827586206896, 0.1, 0.0967741935483871, 0.125, 0.12121212121212122, 0.11764705882352941, 0.11428571428571428, 0.1111111111111111, 0.10810810810810811, 0.10526315789473684, 0.10256410256410256, 0.1, 0.0975609756097561, 0.09523809523809523, 0.09302325581395349, 0.09090909090909091, 0.08888888888888889, 0.08695652173913043, 0.0851063829787234, 0.08333333333333333, 0.08163265306122448, 0.08, 0.09803921568627451, 0.09615384615384616, 0.09433962264150944, 0.09259259259259259, 0.09090909090909091, 0.08928571428571429, 0.08771929824561403, 0.08620689655172414, 0.0847457627118644, 

Adding each ethnic group percentage list to my DataFrame

In [None]:
data['AfricanAmericanPct'] = a_a_pct
data['OtherPct'] = other_pct
data['NonePct'] = nan_pct
data

Unnamed: 0,index,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
0,1911,http://www.wikidata.org/entity/Q4538,Jonathan Winters,male,,Other,Dayton,Q4538,2012-10-31,0.000000,1.000000,0.000000
1,688,http://www.wikidata.org/entity/Q8372,Craig Barron,male,,,Berkeley,Q8372,2012-11-02,0.000000,0.500000,0.500000
2,1832,http://www.wikidata.org/entity/Q8354,John Frazier,male,,,Richmond,Q8354,2012-11-02,0.000000,0.333333,0.666667
3,62,http://www.wikidata.org/entity/Q8346,Albert Arnold Gillespie,male,,,El Paso,Q8346,2012-11-02,0.000000,0.250000,0.750000
4,2003,http://www.wikidata.org/entity/Q15935,Kanye West,male,,African Americans,Atlanta,Q15935,2012-11-07,0.200000,0.200000,0.600000
...,...,...,...,...,...,...,...,...,...,...,...,...
3713,375,http://www.wikidata.org/entity/Q110228980,Bob Diamond,,,,New York City,Q110228980,2021-12-21,0.049542,0.043619,0.906839
3714,1216,http://www.wikidata.org/entity/Q110220845,Gail Browne,female,,,Lakewood,Q110220845,2021-12-21,0.049529,0.043607,0.906864
3715,148,http://www.wikidata.org/entity/Q110220836,Amy Heller,female,,,"Washington, D.C.",Q110220836,2021-12-21,0.049516,0.043595,0.906889
3716,2305,http://www.wikidata.org/entity/Q110228550,M. C. 5 Cents Jones,,,,Louisiana,Q110228550,2021-12-21,0.049502,0.043584,0.906914


Create the figure of All Artists in Plotly Graphic Objects

In [None]:
fig_main = go.Figure()
fig_main.add_trace(go.Scatter(
    x=data['DateAdded'],
    y=data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_main.add_trace(go.Scatter(
    x=data['DateAdded'],
    y=data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_main.add_trace(go.Scatter(
    x=data['DateAdded'],
    y=data['NonePct'],
    name="No Ethnicity Statement"
))

fig_main.update_layout(title="All Artists",
                       yaxis=dict(tickformat=".1%"))

fig_main.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_main.show()

Creating a new DataFrame that only includes Philadelphia Artists

In [None]:
gritty = ['Philadelphia']
phi_data = data[data['birthPlaces.value'].isin(gritty)]
phi_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum',
            'DateAdded'
          ]
phi_data = phi_data[phi_cols]
phi_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
9,http://www.wikidata.org/entity/Q45111,John R. Neill,male,,,Philadelphia,Q45111,2012-11-13
135,http://www.wikidata.org/entity/Q533275,Edward Percy Moran,male,,,Philadelphia,Q533275,2012-12-07
230,http://www.wikidata.org/entity/Q1328705,Paul Garrin,male,,,Philadelphia,Q1328705,2012-12-20
233,http://www.wikidata.org/entity/Q1364808,William Wallace Denslow,male,,,Philadelphia,Q1364808,2012-12-21
234,http://www.wikidata.org/entity/Q1349603,John Ernest,male,,,Philadelphia,Q1349603,2012-12-21
...,...,...,...,...,...,...,...,...
3599,http://www.wikidata.org/entity/Q108421089,Charles Aaron Pridgen,male,,African Americans,Philadelphia,Q108421089,2021-09-05
3600,http://www.wikidata.org/entity/Q108419969,Columbus Knox,,,,Philadelphia,Q108419969,2021-09-05
3601,http://www.wikidata.org/entity/Q108439127,Edwin Lester,male,,,Philadelphia,Q108439127,2021-09-07
3603,http://www.wikidata.org/entity/Q108498023,Anna Clegg Stryke,female,,,Philadelphia,Q108498023,2021-09-11


Repeating the Process of gathering percentages and adding them to my new DataFrame

In [None]:
ph_eth_list = phi_data['ethnicityLabel.value'].tolist()
ph_a_a_pct = []
ph_other_pct = []
ph_nan_pct = []
n = 0

for item in ph_eth_list:
  n = n + 1
  ph_a_a_count = ph_eth_list[0:n].count(a_a)
  ph_a_a_app = ph_a_a_count / n
  ph_a_a_pct.append(ph_a_a_app)
  ph_other_count = ph_eth_list[0:n].count('Other')
  ph_other_app = ph_other_count / n
  ph_other_pct.append(ph_other_app)
  ph_nan_count = ph_eth_list[0:n].count(nan)
  ph_nan_app = ph_nan_count / n
  ph_nan_pct.append(ph_nan_app)
phi_data['AfricanAmericanPct'] = ph_a_a_pct
phi_data['OtherPct'] = ph_other_pct
phi_data['NonePct'] = ph_nan_pct
phi_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
9,http://www.wikidata.org/entity/Q45111,John R. Neill,male,,,Philadelphia,Q45111,2012-11-13,0.000000,0.000000,1.000000
135,http://www.wikidata.org/entity/Q533275,Edward Percy Moran,male,,,Philadelphia,Q533275,2012-12-07,0.000000,0.000000,1.000000
230,http://www.wikidata.org/entity/Q1328705,Paul Garrin,male,,,Philadelphia,Q1328705,2012-12-20,0.000000,0.000000,1.000000
233,http://www.wikidata.org/entity/Q1364808,William Wallace Denslow,male,,,Philadelphia,Q1364808,2012-12-21,0.000000,0.000000,1.000000
234,http://www.wikidata.org/entity/Q1349603,John Ernest,male,,,Philadelphia,Q1349603,2012-12-21,0.000000,0.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...
3599,http://www.wikidata.org/entity/Q108421089,Charles Aaron Pridgen,male,,African Americans,Philadelphia,Q108421089,2021-09-05,0.132911,0.006329,0.860759
3600,http://www.wikidata.org/entity/Q108419969,Columbus Knox,,,,Philadelphia,Q108419969,2021-09-05,0.132075,0.006289,0.861635
3601,http://www.wikidata.org/entity/Q108439127,Edwin Lester,male,,,Philadelphia,Q108439127,2021-09-07,0.131250,0.006250,0.862500
3603,http://www.wikidata.org/entity/Q108498023,Anna Clegg Stryke,female,,,Philadelphia,Q108498023,2021-09-11,0.130435,0.006211,0.863354


Graphing the Philly Data

In [None]:
fig_phi = go.Figure()
fig_phi.add_trace(go.Scatter(
    x=phi_data['DateAdded'],
    y=phi_data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_phi.add_trace(go.Scatter(
    x=phi_data['DateAdded'],
    y=phi_data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_phi.add_trace(go.Scatter(
    x=phi_data['DateAdded'],
    y=phi_data['NonePct'],
    name="No Ethnicity Statement"
))

fig_phi.add_trace(go.Scatter(
    x=['2021-06-01'],
    y=[0.18],
    text="LEADING Project",
    mode="text",
    showlegend=False))

fig_phi.update_layout(title="Philadelphia Artists",
                       yaxis=dict(tickformat=".1%"))

fig_phi.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_phi.add_shape(type="circle",
                  x0='2021-07-15', x1='2021-12-20',
                  y0=.1, y1=.15,
                  line_color='LightSeaGreen', name='LEADING Project Period')

fig_phi.show()

Repeat the Philly Process for New York City

In [None]:
walkin_here = ['New York City']
ny_data = data[data['birthPlaces.value'].isin(walkin_here)]
ny_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum',
            'DateAdded'
          ]
ny_data = ny_data[ny_cols]
ny_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
25,http://www.wikidata.org/entity/Q158255,Lyonel Feininger,male,,,New York City,Q158255,2012-11-27
33,http://www.wikidata.org/entity/Q216298,Nicky Hilton,female,,,New York City,Q216298,2012-11-30
38,http://www.wikidata.org/entity/Q230817,Devon Aoki,female,,,New York City,Q230817,2012-12-01
43,http://www.wikidata.org/entity/Q243078,Alison Knowles,female,,,New York City,Q243078,2012-12-01
60,http://www.wikidata.org/entity/Q271884,Norman Rockwell,male,,,New York City,Q271884,2012-12-02
...,...,...,...,...,...,...,...,...
3647,http://www.wikidata.org/entity/Q109275000,Mariaceleste De Martino,female,,,New York City,Q109275000,2021-10-26
3666,http://www.wikidata.org/entity/Q109690242,Margaret Noel Abell,female,,,New York City,Q109690242,2021-11-23
3679,http://www.wikidata.org/entity/Q109888934,Allen M. Hart,male,,,New York City,Q109888934,2021-12-02
3687,http://www.wikidata.org/entity/Q110111773,Warren F. Scadron,,,,New York City,Q110111773,2021-12-14


In [None]:
ny_eth_list = ny_data['ethnicityLabel.value'].tolist()
ny_a_a_pct = []
ny_other_pct = []
ny_nan_pct = []
n = 0

for item in ny_eth_list:
  n = n + 1
  ny_a_a_count = ny_eth_list[0:n].count(a_a)
  ny_a_a_app = ny_a_a_count / n
  ny_a_a_pct.append(ny_a_a_app)
  ny_other_count = ny_eth_list[0:n].count('Other')
  ny_other_app = ny_other_count / n
  ny_other_pct.append(ny_other_app)
  ny_nan_count = ny_eth_list[0:n].count(nan)
  ny_nan_app = ny_nan_count / n
  ny_nan_pct.append(ny_nan_app)
ny_data['AfricanAmericanPct'] = ny_a_a_pct
ny_data['OtherPct'] = ny_other_pct
ny_data['NonePct'] = ny_nan_pct
ny_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
25,http://www.wikidata.org/entity/Q158255,Lyonel Feininger,male,,,New York City,Q158255,2012-11-27,0.000000,0.000000,1.000000
33,http://www.wikidata.org/entity/Q216298,Nicky Hilton,female,,,New York City,Q216298,2012-11-30,0.000000,0.000000,1.000000
38,http://www.wikidata.org/entity/Q230817,Devon Aoki,female,,,New York City,Q230817,2012-12-01,0.000000,0.000000,1.000000
43,http://www.wikidata.org/entity/Q243078,Alison Knowles,female,,,New York City,Q243078,2012-12-01,0.000000,0.000000,1.000000
60,http://www.wikidata.org/entity/Q271884,Norman Rockwell,male,,,New York City,Q271884,2012-12-02,0.000000,0.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...
3647,http://www.wikidata.org/entity/Q109275000,Mariaceleste De Martino,female,,,New York City,Q109275000,2021-10-26,0.027907,0.011628,0.960465
3666,http://www.wikidata.org/entity/Q109690242,Margaret Noel Abell,female,,,New York City,Q109690242,2021-11-23,0.027842,0.011601,0.960557
3679,http://www.wikidata.org/entity/Q109888934,Allen M. Hart,male,,,New York City,Q109888934,2021-12-02,0.027778,0.011574,0.960648
3687,http://www.wikidata.org/entity/Q110111773,Warren F. Scadron,,,,New York City,Q110111773,2021-12-14,0.027714,0.011547,0.960739


In [None]:
fig_ny = go.Figure()
fig_ny.add_trace(go.Scatter(
    x=ny_data['DateAdded'],
    y=ny_data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_ny.add_trace(go.Scatter(
    x=ny_data['DateAdded'],
    y=ny_data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_ny.add_trace(go.Scatter(
    x=ny_data['DateAdded'],
    y=ny_data['NonePct'],
    name="No Ethnicity Statement"
))

fig_ny.update_layout(title="New York City Artists",
                       yaxis=dict(tickformat=".1%"))

fig_ny.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_ny.show()

Detroit

In [None]:
vs_everybody = ['Detroit']
det_data = data[data['birthPlaces.value'].isin(vs_everybody)]
det_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum',
            'DateAdded'
          ]
det_data = det_data[ny_cols]
det_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
27,http://www.wikidata.org/entity/Q178040,Ray Johnson,male,,,Detroit,Q178040,2012-11-28
145,http://www.wikidata.org/entity/Q543294,Mike Kelley,male,,,Detroit,Q543294,2012-12-07
186,http://www.wikidata.org/entity/Q922536,Sarah Jackson,female,,,Detroit,Q922536,2012-12-14
188,http://www.wikidata.org/entity/Q882861,James Lee Byars,male,,,Detroit,Q882861,2012-12-14
253,http://www.wikidata.org/entity/Q1444685,John Nelson,male,,,Detroit,Q1444685,2012-12-22
...,...,...,...,...,...,...,...,...
3414,http://www.wikidata.org/entity/Q107253769,Gary Metz,male,,,Detroit,Q107253769,2021-06-15
3422,http://www.wikidata.org/entity/Q107253766,G. Ray Kerciu,male,,,Detroit,Q107253766,2021-06-15
3438,http://www.wikidata.org/entity/Q107399564,Ken Aptekar,male,,,Detroit,Q107399564,2021-07-02
3652,http://www.wikidata.org/entity/Q109399033,Ben Blount,male,,,Detroit,Q109399033,2021-11-04


In [None]:
det_eth_list = det_data['ethnicityLabel.value'].tolist()
det_a_a_pct = []
det_other_pct = []
det_nan_pct = []
n = 0

for item in det_eth_list:
  n = n + 1
  det_a_a_count = det_eth_list[0:n].count(a_a)
  det_a_a_app = det_a_a_count / n
  det_a_a_pct.append(det_a_a_app)
  det_other_count = det_eth_list[0:n].count('Other')
  det_other_app = det_other_count / n
  det_other_pct.append(det_other_app)
  det_nan_count = det_eth_list[0:n].count(nan)
  det_nan_app = det_nan_count / n
  det_nan_pct.append(det_nan_app)
det_data['AfricanAmericanPct'] = det_a_a_pct
det_data['OtherPct'] = det_other_pct
det_data['NonePct'] = det_nan_pct
det_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
27,http://www.wikidata.org/entity/Q178040,Ray Johnson,male,,,Detroit,Q178040,2012-11-28,0.000000,0.000000,1.000000
145,http://www.wikidata.org/entity/Q543294,Mike Kelley,male,,,Detroit,Q543294,2012-12-07,0.000000,0.000000,1.000000
186,http://www.wikidata.org/entity/Q922536,Sarah Jackson,female,,,Detroit,Q922536,2012-12-14,0.000000,0.000000,1.000000
188,http://www.wikidata.org/entity/Q882861,James Lee Byars,male,,,Detroit,Q882861,2012-12-14,0.000000,0.000000,1.000000
253,http://www.wikidata.org/entity/Q1444685,John Nelson,male,,,Detroit,Q1444685,2012-12-22,0.000000,0.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...
3414,http://www.wikidata.org/entity/Q107253769,Gary Metz,male,,,Detroit,Q107253769,2021-06-15,0.049180,0.081967,0.868852
3422,http://www.wikidata.org/entity/Q107253766,G. Ray Kerciu,male,,,Detroit,Q107253766,2021-06-15,0.048387,0.080645,0.870968
3438,http://www.wikidata.org/entity/Q107399564,Ken Aptekar,male,,,Detroit,Q107399564,2021-07-02,0.047619,0.079365,0.873016
3652,http://www.wikidata.org/entity/Q109399033,Ben Blount,male,,,Detroit,Q109399033,2021-11-04,0.046875,0.078125,0.875000


In [None]:
fig_det = go.Figure()
fig_det.add_trace(go.Scatter(
    x=det_data['DateAdded'],
    y=det_data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_det.add_trace(go.Scatter(
    x=det_data['DateAdded'],
    y=det_data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_det.add_trace(go.Scatter(
    x=det_data['DateAdded'],
    y=det_data['NonePct'],
    name="No Ethnicity Statement"
))

fig_det.update_layout(title="Detroit Artists",
                       yaxis=dict(tickformat=".1%"))

fig_det.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_det.show()

Chicago

In [None]:
melort = ['Chicago']
chi_data = data[data['birthPlaces.value'].isin(melort)]
chi_cols = ['artist.value', 
            'artistLabel.value',
            'sexGenderLabel.value',
            'sexualOrientationLabel.value',
            'ethnicityLabel.value',
            'birthPlaces.value',
            'QNum',
            'DateAdded'
          ]
chi_data = chi_data[chi_cols]
chi_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded
29,http://www.wikidata.org/entity/Q202621,Larry Bell,male,,,Chicago,Q202621,2012-11-29
31,http://www.wikidata.org/entity/Q192410,Jennifer Hudson,female,,African Americans,Chicago,Q192410,2012-11-29
46,http://www.wikidata.org/entity/Q268542,Wendy Schaal,female,,,Chicago,Q268542,2012-12-01
53,http://www.wikidata.org/entity/Q234928,Alice Bradley Sheldon,female,bisexuality,,Chicago,Q234928,2012-12-01
102,http://www.wikidata.org/entity/Q442214,Cynthia Plaster Caster,female,,,Chicago,Q442214,2012-12-06
...,...,...,...,...,...,...,...,...
3607,http://www.wikidata.org/entity/Q108570562,Gayle Tanaka,female,,,Chicago,Q108570562,2021-09-17
3611,http://www.wikidata.org/entity/Q108678763,Rose Crosman,female,,,Chicago,Q108678763,2021-09-24
3632,http://www.wikidata.org/entity/Q108758789,Alva Mooses,,,,Chicago,Q108758789,2021-10-01
3646,http://www.wikidata.org/entity/Q109230278,Galia Zamir,female,,,Chicago,Q109230278,2021-10-25


In [None]:
chi_eth_list = chi_data['ethnicityLabel.value'].tolist()
chi_a_a_pct = []
chi_other_pct = []
chi_nan_pct = []
n = 0

for item in chi_eth_list:
  n = n + 1
  chi_a_a_count = chi_eth_list[0:n].count(a_a)
  chi_a_a_app = chi_a_a_count / n
  chi_a_a_pct.append(chi_a_a_app)
  chi_other_count = chi_eth_list[0:n].count('Other')
  chi_other_app = chi_other_count / n
  chi_other_pct.append(chi_other_app)
  chi_nan_count = chi_eth_list[0:n].count(nan)
  chi_nan_app = chi_nan_count / n
  chi_nan_pct.append(chi_nan_app)
chi_data['AfricanAmericanPct'] = chi_a_a_pct
chi_data['OtherPct'] = chi_other_pct
chi_data['NonePct'] = chi_nan_pct
chi_data

Unnamed: 0,artist.value,artistLabel.value,sexGenderLabel.value,sexualOrientationLabel.value,ethnicityLabel.value,birthPlaces.value,QNum,DateAdded,AfricanAmericanPct,OtherPct,NonePct
29,http://www.wikidata.org/entity/Q202621,Larry Bell,male,,,Chicago,Q202621,2012-11-29,0.000000,0.000000,1.000000
31,http://www.wikidata.org/entity/Q192410,Jennifer Hudson,female,,African Americans,Chicago,Q192410,2012-11-29,0.500000,0.000000,0.500000
46,http://www.wikidata.org/entity/Q268542,Wendy Schaal,female,,,Chicago,Q268542,2012-12-01,0.333333,0.000000,0.666667
53,http://www.wikidata.org/entity/Q234928,Alice Bradley Sheldon,female,bisexuality,,Chicago,Q234928,2012-12-01,0.250000,0.000000,0.750000
102,http://www.wikidata.org/entity/Q442214,Cynthia Plaster Caster,female,,,Chicago,Q442214,2012-12-06,0.200000,0.000000,0.800000
...,...,...,...,...,...,...,...,...,...,...,...
3607,http://www.wikidata.org/entity/Q108570562,Gayle Tanaka,female,,,Chicago,Q108570562,2021-09-17,0.048485,0.018182,0.933333
3611,http://www.wikidata.org/entity/Q108678763,Rose Crosman,female,,,Chicago,Q108678763,2021-09-24,0.048193,0.018072,0.933735
3632,http://www.wikidata.org/entity/Q108758789,Alva Mooses,,,,Chicago,Q108758789,2021-10-01,0.047904,0.017964,0.934132
3646,http://www.wikidata.org/entity/Q109230278,Galia Zamir,female,,,Chicago,Q109230278,2021-10-25,0.047619,0.017857,0.934524


In [None]:
fig_chi = go.Figure()
fig_chi.add_trace(go.Scatter(
    x=chi_data['DateAdded'],
    y=chi_data['AfricanAmericanPct'],
    name="African American Ethnicity Statement"
))
fig_chi.add_trace(go.Scatter(
    x=chi_data['DateAdded'],
    y=chi_data['OtherPct'],
    name="Non-African American Ethnicity Statement"
))
fig_chi.add_trace(go.Scatter(
    x=chi_data['DateAdded'],
    y=chi_data['NonePct'],
    name="No Ethnicity Statement"
))

fig_chi.update_layout(title="Chicago Artists",
                       yaxis=dict(tickformat=".1%"))

fig_chi.update_yaxes(title="Percentage of Artists",
                      tick0=0.1,
                      dtick=0.2)

fig_chi.show()