<a href="https://colab.research.google.com/github/dhan16/colabs/blob/master/covid19opendata/WikiData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Sparql functions
import requests
import pandas as pd

ENDPOINT = "https://query.wikidata.org/sparql"


def wiki_data(sparql):
  res = requests.get(ENDPOINT, params = {'format': 'json', 'query': sparql})
  return res.json()


def wikidata_to_dataframe(json):
  results = json["results"]["bindings"]
  # column names we draw from the first result
  cols = [ val for val in results[0] ]
  rows = []
  for result in results:
      values = [ result[val]["value"] for val in result ]
      rows.append(values)
  return pd.DataFrame(rows, columns=cols)


In [2]:
sparql = """
SELECT ?place ?placeLabel ?class ?classLabel
WHERE
{
  ?place wdt:P31/wdt:P279* wd:Q12479774.
  ?place wdt:P31 ?class
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "en" .
  }
}
"""
res = wiki_data(sparql)
wiki_df = wikidata_to_dataframe(res)
wiki_df

Unnamed: 0,place,class,placeLabel,classLabel
0,http://www.wikidata.org/entity/Q4803,http://www.wikidata.org/entity/Q1549591,Surakarta,big city
1,http://www.wikidata.org/entity/Q4803,http://www.wikidata.org/entity/Q3199141,Surakarta,city of Indonesia
2,http://www.wikidata.org/entity/Q5779,http://www.wikidata.org/entity/Q1549591,Banda Aceh,big city
3,http://www.wikidata.org/entity/Q5779,http://www.wikidata.org/entity/Q3199141,Banda Aceh,city of Indonesia
4,http://www.wikidata.org/entity/Q5781,http://www.wikidata.org/entity/Q1549591,Langsa,big city
...,...,...,...,...
622,http://www.wikidata.org/entity/Q19745487,http://www.wikidata.org/entity/Q3191695,Buton Tengah,regency of Indonesia
623,http://www.wikidata.org/entity/Q19745612,http://www.wikidata.org/entity/Q3191695,Buton Selatan,regency of Indonesia
624,http://www.wikidata.org/entity/Q19746428,http://www.wikidata.org/entity/Q3191695,Muna Barat,regency of Indonesia
625,http://www.wikidata.org/entity/Q26759674,http://www.wikidata.org/entity/Q3191695,Kepulauan Anambas,regency of Indonesia


In [None]:
wiki_df.classLabel.unique()

In [3]:
!pip install --upgrade gspread

Collecting gspread
  Downloading https://files.pythonhosted.org/packages/9c/ba/bc8de4f5077bd34bc873bdd67a89cb29c4f181abba8a836d2c6a0a142365/gspread-3.6.0-py3-none-any.whl
Installing collected packages: gspread
  Found existing installation: gspread 3.0.1
    Uninstalling gspread-3.0.1:
      Successfully uninstalled gspread-3.0.1
Successfully installed gspread-3.6.0


In [None]:
from google.colab import auth
auth.authenticate_user()
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())

sheet_url = 'https://docs.google.com/spreadsheets/d/1FJJXiGuOb5nXrjJeV3QcHNhTo38YdcsTIFl29mWDIqI/edit#gid=2006070746'
worksheet = gc.open_by_url(sheet_url).worksheet('Kode Kota')
rows = worksheet.get_all_values()

# Convert to a DataFrame and render.
import pandas as pd
sheet_df = pd.DataFrame.from_records(rows[2:], columns=rows[1])
sheet_df

In [None]:
# Find rows in sheet_df with no matches in wiki_df
df = sheet_df.merge(wiki_df, how='left', left_on='Kota', right_on='placeLabel') 
# missing = df[df['placeLabel'].isnull() & ~df['KabKota'].isin(['zTam'])]
missing = df[df['placeLabel'].isnull()]
missing

Unnamed: 0,ID Provinsi,Provinsi,ID Kota,KabKota,Kota,place,class,placeLabel,classLabel
26,1,Aceh (NAD),1011,zTam,Luar Provinsi Aceh,,,,
27,1,Aceh (NAD),1013,zTam,Luar Negeri (Aceh),,,,
28,1,Aceh (NAD),1012,zTam,Belum Diverifikasi (Aceh),,,,
29,2,Bali,1021,zTam,WNA (Bali),,,,
30,2,Bali,1022,zTam,Luar Provinsi Bali,,,,
...,...,...,...,...,...,...,...,...,...
704,34,Sumatera Utara,472,Kab.,Labuhanbatu,,,,
721,34,Sumatera Utara,486,Kota,Pematang Siantar,,,,
726,34,Sumatera Utara,491,Kota,Tanjung Balai,,,,
728,34,Sumatera Utara,493,Kab.,Tapanuli Tengah,,,,
