<img src="Maslow's_Hierarchy_of_Needs.svg.png" height = 600px>

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import urllib.parse # combine URL components into URL string
import wikipediaapi # query wikipedia through api

from statsmodels.tsa.seasonal import STL # seasonal decompositions
import statsmodels.tsa.stattools as smt


import pickle #  to serialize and deserialize objects in Python
import requests
from scipy import signal
import warnings
from urllib import request
from bs4 import BeautifulSoup
import json
import pytrends
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
import wikipediaapi
import pprint as pp

In [2]:
# 
def create_dataframe(name_file):
    """
    Creates a dataframe from a text file
    param: name_file: name of the text file
    return: dataframe with the text file
    """
    df = pd.read_csv(name_file, delimiter="\t", header=None, names=['Topics'])
    df.drop_duplicates(inplace=True)
    return df

# Take only starting from the second word in each row
def remove_space(df):
    return df['Topics'].apply(lambda x: x.strip().replace(' ', '_'))

# Parse the topics into the URL format
def parse_topics_into_df(df, start_time, end_time):
    # change the spaces to underscores
    df['url'] = np.zeros(len(df))
    for index, row in df.iterrows():
        topic_value = row['Topics']
        df.loc[index, 'url'] = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/all-agents/{topic_value}/monthly/{start_time}/{end_time}'

    return df

# Create a new dataframe with timestamp from starting date to ending date
def create_dataframe_timestamp(starting_date, ending_date):
    df_timestamp = pd.DataFrame()
    df_timestamp['Timestamp'] = pd.date_range(start=starting_date, end=ending_date, freq='MS')
    return df_timestamp

# Define a function to fetch data from the URL and handle errors
def fetch_and_parse_url(url):
    try:
        request.urlopen(url).read()
        return True
    except request.HTTPError as e:
        if e.code == 404:
            return None  # or any other value or action you prefer for 404 errors
        else:
            return None  # or handle other HTTP errors as needed
    except Exception as e:
        
        return None  # or handle other exceptions as needed

def get_pageviews_wiki(url):
    """
    Gets the weekly pageviews for one Wikipedia page in one language in the desired period
    param: url: url of the Wikipedia page
    param: start_date: beginning of the desired period 
    param: end_date: end of the desired period 
    return: dataframe column with the monthly pageviews
    """
    html = request.urlopen(url).read()
    soup = BeautifulSoup(html,'html.parser')
    site_json=json.loads(soup.text)
    df=pd.DataFrame(site_json['items'])
    df=df['views']
    return df

def scrape_pageviews(df):
    pageview = pd.DataFrame()
    pageview['Timestamp'] = create_dataframe_timestamp('2019-01-01', '2020-07-31')['Timestamp']

    # Loop through the rows of the DataFrame and append the results of the function to the DataFrame
    for index, row in df.iterrows():
        url = row['url']
        if fetch_and_parse_url(url):
            pageview[row['Topics']] = get_pageviews_wiki(url)

    return pageview

# Specify the path to your text file
text_file_path = 'creativity.txt'

def create_dataframe_pageviews(name_file, start_time = '20190101', end_time = '20200731'):
    """
    Creates a dataframe from a text file
    param: name_file: name of the text file, start_time: beginning of the desired period, end_time: end of the desired period
    """
    df_topic = create_dataframe(name_file)
    df_topic['Topics'] = remove_space(df_topic)
    df_topic = parse_topics_into_df(df_topic, start_time, end_time)
    df_pageviews = scrape_pageviews(df_topic)
    df_pageviews.fillna(0, inplace=True)
    return df_pageviews, df_topic

# Read the text file into a DataFrame
df_pageviews, df_topic = create_dataframe_pageviews(text_file_path)
print(type(df_pageviews))
print(df_pageviews.shape)
df_pageviews

<class 'pandas.core.frame.DataFrame'>
(19, 63)


Unnamed: 0,Timestamp,Innovation,Imagination,Design_Thinking,Creative_Writing,Collaboration,Problem_Solving,Originality,Curiosity,Visual_Arts,...,Gamification,Neuroplasticity,Universal_Design,Eco-art,Creative_Economy,Biophilic_Design,Neuroergonomics,Emotional_Marketing,Ethical_Marketing,Cultural_Diplomacy
0,2019-01-01,42306,14371,867,7655,16515,974,3921,7651,1191,...,20987,41780,173,48,22,241,457,66,30,192
1,2019-02-01,42934,11614,487,5533,15903,730,6288,7436,970,...,19327,39362,197,39,24,195,303,52,21,140
2,2019-03-01,45950,12652,704,3970,18002,859,4647,7498,1026,...,24045,38520,244,58,25,242,461,70,26,187
3,2019-04-01,41533,11875,750,3413,16438,858,4062,6669,1146,...,21869,38628,243,33,25,202,450,73,22,155
4,2019-05-01,43252,13386,868,3080,16961,1454,4182,7397,1162,...,20032,35195,745,35,20,171,389,51,8,175
5,2019-06-01,37381,12328,637,3194,13962,1389,3041,7935,1143,...,18605,31261,1092,30,19,209,352,29,10,120
6,2019-07-01,38019,12433,626,2012,14482,855,3157,8030,1012,...,19648,30681,674,35,36,201,304,15,11,187
7,2019-08-01,40097,12763,593,1766,14883,926,3344,8005,1014,...,20023,32695,616,44,24,537,287,29,7,294
8,2019-09-01,46346,13006,624,2091,16746,458,3401,8992,1001,...,21446,39244,367,43,23,192,355,32,8,167
9,2019-10-01,50705,14896,583,2707,20334,720,4357,9823,859,...,21704,40062,346,38,24,251,366,29,12,158


In [3]:
def get_label_in_language(english_label, target_language):
    # Endpoint URL for the Wikidata Query Service
    url = "https://query.wikidata.org/sparql"
    
    # SPARQL query to get the item with the English label and its label in the target language
    query = f'''
    SELECT ?item ?itemLabel WHERE {{
      ?item rdfs:label "{english_label}"@en.
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],{target_language}". }}
    }}
    LIMIT 1
    '''
    
    # Headers for the request
    headers = {
        'User-Agent': 'MyBot/0.1 (myemail@example.com)',
        'Accept': 'application/sparql-results+json'
    }
    
    # Make the GET request
    response = requests.get(url, headers=headers, params={'query': query, 'format': 'json'})
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        results = data['results']['bindings']
        if results:
            # Return the item and its label in the target language
            return results[0]['itemLabel']['value']
        else:
            return '_' # No label found for this language
    else:
        # Handle unsuccessful requests
        response.raise_for_status()

def change_Q(name):
    if name.startswith('Q'):
        return ' '
    else:
        return name


# Run through all the topics and get the translation in Italien and store it in a new column in the DataFrame
df_topic['Topics'] = df_topic['Topics'].str.lower()
df_topic['Italian'] = df_topic['Topics'].apply(lambda x: get_label_in_language(x, 'it'))
df_topic['Italian'].replace('No label found for this language', ' ', inplace=True)
df_topic['Italian'] = df_topic['Italian'].apply(lambda x: change_Q(x))

# Count the number of topics that have a translation in Spanish
print(df_topic['Italian'].value_counts())
df_topic

Italian
_                       318
                          3
innovazione               1
improvvisazione           1
gamificazione             1
etnobotanica              1
Co-creazione              1
sintesi                   1
avanguardia               1
storytelling              1
scultura                  1
immaginazione             1
imprenditorialità         1
curiosità                 1
originalità               1
collaborazione            1
plasticità cerebrale      1
Name: count, dtype: int64


Unnamed: 0,Topics,url,Italian
0,innovation,https://wikimedia.org/api/rest_v1/metrics/page...,innovazione
1,imagination,https://wikimedia.org/api/rest_v1/metrics/page...,immaginazione
2,artistic_expression,https://wikimedia.org/api/rest_v1/metrics/page...,_
3,design_thinking,https://wikimedia.org/api/rest_v1/metrics/page...,_
4,creative_writing,https://wikimedia.org/api/rest_v1/metrics/page...,_
...,...,...,...
336,theatricality_in_storytelling,https://wikimedia.org/api/rest_v1/metrics/page...,_
338,designing_for_inclusive_entrepreneurship,https://wikimedia.org/api/rest_v1/metrics/page...,_
339,the_future_of_creativity_in_biodesign,https://wikimedia.org/api/rest_v1/metrics/page...,_
341,design_for_inclusive_technology,https://wikimedia.org/api/rest_v1/metrics/page...,_
