# User Behavior Analysis

In [None]:
import pandas as pd
import json
from tqdm import tqdm
from SPARQLWrapper import SPARQLWrapper, JSON
import time

MIND_type = 'MINDsmall'

data_path_base="/app/SUBERX/datasets/"
data_path = data_path_base + MIND_type +"/"


behaviors_file = data_path + "train/behaviors.tsv"
print(f"Behaviors File {behaviors_file}")

news_file = data_path + "train/news.tsv"
news_df = pd.read_csv(news_file, sep="\t", names=["news_id", "category", "subcategory", "title", "abstract", "url", "title_entities", "abstract_entities"])
print(f"News file {news_file}")
# Load the behaviors data
columns = ["impression_id", "user_id", "time", "history", "impressions"]
behaviors_df = pd.read_csv(behaviors_file, sep="\t", names=columns)

# Display basic statistics and data sample
#print(behaviors_df.info())
#print(behaviors_df.head())

## Statistics

Calculate some statistics

**Number of unique users**: There are 50,000 unique users in the *small* MIND dataset.
**NUmber of sessions**: A session represents a single instance where a user interacts with the platform, such as browsing news or clicking on articles.
**Average Session length**: A longer history provides more context for the recommendation system but can also increase computational complexity.
**Average Impressions per session**: Impressions are the total number of recommendations or articles displayed to a user.


In [None]:
num_users = behaviors_df["user_id"].nunique()
print(f"Number of unique users: {num_users}")


num_sessions = len(behaviors_df)
print(f"Number of sessions: {num_sessions}")

behaviors_df["history_length"] = behaviors_df["history"].fillna("").apply(lambda x: len(x.split(" ")))
avg_history_length = behaviors_df["history_length"].mean()
print(f"Average session length (history length): {avg_history_length:.2f}")

behaviors_df["num_impressions"] = behaviors_df["impressions"].apply(lambda x: len(x.split(" ")))
avg_impressions = behaviors_df["num_impressions"].mean()
print(f"Average impressions per session: {avg_impressions:.2f}")




def calculate_ctr(impressions):
    clicks = sum([1 for impression in impressions.split(" ") if impression.endswith("-1")])
    total = len(impressions.split(" "))
    return clicks / total if total > 0 else 0

behaviors_df["ctr"] = behaviors_df["impressions"].apply(calculate_ctr)
avg_ctr = behaviors_df["ctr"].mean()
print(f"Average CTR: {avg_ctr:.2%}")



## Group Users into catagories based on engagement

In [None]:
def user_engagement_group(ctr):
    if ctr > 0.75:
        return "Highly Engaged"
    elif ctr > 0.25:
        return "Moderately Engaged"
    else:
        return "Low Engagement"

user_ctr = behaviors_df.groupby("user_id")["ctr"].mean()
engagement_groups = user_ctr.apply(user_engagement_group)

engagement_summary = engagement_groups.value_counts()
print("User Engagement Summary:")
print(engagement_summary)


## Based on time



In [None]:
behaviors_df["time"] = pd.to_datetime(behaviors_df["time"])
behaviors_df["hour"] = behaviors_df["time"].dt.hour

hourly_behavior = behaviors_df.groupby("hour")["ctr"].mean()
hourly_behavior.plot(kind="bar", title="CTR by Hour of Day", xlabel="Hour", ylabel="CTR")


## Visualize the results


In [None]:
import matplotlib.pyplot as plt

plt.hist(behaviors_df["ctr"], bins=50, color="blue", alpha=0.7)
plt.title("Distribution of CTR")
plt.xlabel("CTR")
plt.ylabel("Frequency")
plt.show()


In [None]:
import matplotlib.pyplot as plt

# User engagement data
labels = ['Low Engagement', 'Moderately Engaged']
counts = [44627, 5373]
colors = ['lightcoral', 'skyblue']

# Pie chart
plt.figure(figsize=(8, 8))
plt.pie(counts, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, wedgeprops={'edgecolor': 'black'})
plt.title('User Engagement Summary')
plt.show()


In [None]:
## Distribution of session history lengths


In [None]:
import matplotlib.pyplot as plt

# Distribution of session history lengths
plt.figure(figsize=(10, 6))
plt.hist(behaviors_df['history_length'], bins=50, color='skyblue', edgecolor='black', alpha=0.7)
plt.title('Distribution of Session History Lengths')
plt.xlabel('History Length (Number of Articles)')
plt.ylabel('Frequency')
plt.show()


In [None]:
# Summary statistics for session history length
history_stats = behaviors_df['history_length'].describe()
print(history_stats)


In [None]:
# Categorize sessions based on history length
def categorize_history_length(length):
    if length < 10:
        return 'Short'
    elif length < 50:
        return 'Medium'
    else:
        return 'Long'

behaviors_df['history_category'] = behaviors_df['history_length'].apply(categorize_history_length)

# Count sessions in each category
history_categories = behaviors_df['history_category'].value_counts()
print(history_categories)


In [None]:
# Average CTR by history category
ctr_by_history_category = behaviors_df.groupby('history_category')['ctr'].mean()
print(ctr_by_history_category)

# Bar chart for visualization
ctr_by_history_category.plot(kind='bar', color='coral', edgecolor='black', title='CTR by Session History Category')
plt.xlabel('Session History Category')
plt.ylabel('Average CTR')
plt.show()


In [None]:
# Average history length by hour of the day
hourly_history = behaviors_df.groupby('hour')['history_length'].mean()
hourly_history.plot(kind='line', title='Average Session History Length by Hour', xlabel='Hour', ylabel='Average History Length')
plt.show()


In [None]:
# Simulated data for 'history_category' counts
history_category_counts = {
    'Medium': 78667,
    'Short': 46784,
    'Long': 31514
}

# Pie chart for session history categories
plt.figure(figsize=(8, 8))
plt.pie(
    history_category_counts.values(),
    labels=history_category_counts.keys(),
    autopct='%1.1f%%',
    startangle=90,
    colors=['skyblue', 'lightcoral', 'lightgreen'],
    wedgeprops={'edgecolor': 'black'}
)
plt.title('Distribution of Session History Categories')
plt.show()


---

## Summary so Far

With the small MIND dataset we can see that most of the users are not engaged at all. 

There are 50,000 users and 156965 sesions. The majority of users, approximately 89% have low engagement suggesting either the recommendations are not aligning well with user interests or users are inherently less interactive and ignore recommendations, consumming news randomly.

## The Flip side

The flip side of this is that the user is drawn to recommendations because they are an expert in, say for example, *the Transylvania space program.* The recommendations are vital to focusing their attention on a specific area of analysis or research. 

## What would an analysts dataset look like

Consider a dataset of behaviors based on analysts who need to view the most `relevent news` first.  Not the daily news but the daily news about a specific topic that they maintain expertise on.  What does that look like?  How can we create that?



---

## First extract the entities/topics from the dataset

In [None]:

# Function to extract entity IDs
def extract_entity_ids(entities_str):
    if pd.isna(entities_str):
        return []
    entities = json.loads(entities_str)
    return [entity.get('WikidataId') for entity in entities if 'WikidataId' in entity]

# Extract entities from title and abstract as lists
news_df['title_entity_ids'] = news_df['title_entities'].apply(extract_entity_ids)
news_df['abstract_entity_ids'] = news_df['abstract_entities'].apply(extract_entity_ids)

# Combine all entity IDs into one column
news_df['all_entity_ids'] = news_df['title_entity_ids'] + news_df['abstract_entity_ids']

# Flatten and get unique entity IDs
unique_entity_ids = set(entity for entities in news_df['all_entity_ids'] for entity in entities)
print(f"Unique entity IDs: {len(unique_entity_ids)}")


In [None]:
import requests

def fetch_entity_name_old(entity_id):
    url = f"https://www.wikidata.org/wiki/Special:EntityData/{entity_id}.json"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            return data['entities'][entity_id]['labels']['en']['value']
    except Exception as e:
        print(f"Error fetching {entity_id}: {e}")
    return "Unknown"

# Map IDs to names
#news_df['all_entity_names'] = news_df['all_entity_ids'].apply(lambda ids: [fetch_entity_name(eid) for eid in ids])


In [None]:

# Function to query Wikidata for entity names
def fetch_entity_name(entity_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    query = f"""
    SELECT ?entity ?label WHERE {{
      VALUES ?entity {{ wd:{entity_id} }}
      ?entity rdfs:label ?label .
      FILTER(LANG(?label) = "en")
    }}
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    try:
        results = sparql.query().convert()
        for result in results['results']['bindings']:
            return result['label']['value']
    except Exception as e:
        print(f"Error fetching {entity_id}: {e}")
        return "Unknown"


In [17]:
# Create the mapping DataFrame
entity_mapping = []
for entity_id in tqdm(unique_entity_ids, desc="Fetching entity names", unit="entity"):
    name = fetch_entity_name(entity_id)
    entity_mapping.append({"entity_id": entity_id, "entity_name": name})
    time.sleep(0.2)  # Avoid overwhelming the API

entity_mapping_df = pd.DataFrame(entity_mapping)

# Save the mapping
#entity_mapping_df.to_csv("entity_mapping.csv", index=False)

Fetching entity names:  33% 9093/27759 [56:54<1:40:48,  3.09entity/s]

Error fetching Q6180353: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9102/27759 [56:57<1:43:10,  3.01entity/s]

Error fetching Q4859813: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9105/27759 [56:58<1:41:40,  3.06entity/s]

Error fetching Q1079: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9109/27759 [57:00<1:45:26,  2.95entity/s]

Error fetching Q6444131: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9114/27759 [57:15<11:40:20,  2.25s/entity]

Error fetching Q7605242: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9123/27759 [57:18<2:08:01,  2.43entity/s] 

Error fetching Q852598: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9133/27759 [57:21<1:34:20,  3.29entity/s]

Error fetching Q48282: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9140/27759 [57:24<1:41:46,  3.05entity/s]

Error fetching Q3065651: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9151/27759 [57:28<1:49:02,  2.84entity/s]

Error fetching Q440285: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9155/27759 [57:29<1:43:18,  3.00entity/s]

Error fetching Q578289: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9168/27759 [57:33<1:42:45,  3.02entity/s]

Error fetching Q188460: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9177/27759 [57:36<1:44:57,  2.95entity/s]

Error fetching Q537463: HTTP Error 429: Too Many Requests
Error fetching Q69354: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9186/27759 [57:39<1:41:36,  3.05entity/s]

Error fetching Q778: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9189/27759 [57:40<1:46:49,  2.90entity/s]

Error fetching Q15094181: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9197/27759 [57:43<1:46:07,  2.92entity/s]

Error fetching Q988744: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9199/27759 [57:44<1:43:42,  2.98entity/s]

Error fetching Q108131: HTTP Error 429: Too Many Requests


Fetching entity names:  33% 9214/27759 [57:49<1:42:49,  3.01entity/s]

Error fetching Q184827: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 10978/27759 [1:08:21<1:32:43,  3.02entity/s]

Error fetching Q6271599: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 10984/27759 [1:08:23<1:32:16,  3.03entity/s]

Error fetching Q3944383: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 10989/27759 [1:08:25<1:34:41,  2.95entity/s]

Error fetching Q844123: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11004/27759 [1:08:30<1:35:44,  2.92entity/s]

Error fetching Q55106223: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11009/27759 [1:08:31<1:33:47,  2.98entity/s]

Error fetching Q188116: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11011/27759 [1:08:32<1:32:04,  3.03entity/s]

Error fetching Q434890: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11021/27759 [1:08:36<1:35:18,  2.93entity/s]

Error fetching Q25810: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11025/27759 [1:08:37<1:36:03,  2.90entity/s]

Error fetching Q473972: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11038/27759 [1:08:41<1:32:33,  3.01entity/s]

Error fetching Q1148974: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11054/27759 [1:08:47<1:33:26,  2.98entity/s]

Error fetching Q8023558: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11056/27759 [1:08:48<1:31:35,  3.04entity/s]

Error fetching Q50591847: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11059/27759 [1:08:49<1:31:41,  3.04entity/s]

Error fetching Q7320945: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11061/27759 [1:08:49<1:31:39,  3.04entity/s]

Error fetching Q21708200: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11063/27759 [1:08:50<1:32:20,  3.01entity/s]

Error fetching Q28101955: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11065/27759 [1:08:51<1:53:01,  2.46entity/s]

Error fetching Q490813: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11068/27759 [1:08:52<1:41:42,  2.74entity/s]

Error fetching Q9653: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11070/27759 [1:08:52<1:36:51,  2.87entity/s]

Error fetching Q719643: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11074/27759 [1:08:54<2:14:38,  2.07entity/s]

Error fetching Q6549316: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11081/27759 [1:08:57<1:37:50,  2.84entity/s]

Error fetching Q3073413: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11087/27759 [1:08:59<1:34:24,  2.94entity/s]

Error fetching Q592524: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11089/27759 [1:08:59<1:33:14,  2.98entity/s]

Error fetching Q3229192: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11101/27759 [1:09:03<1:35:02,  2.92entity/s]

Error fetching Q544894: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11105/27759 [1:09:05<1:32:41,  2.99entity/s]

Error fetching Q4999220: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11111/27759 [1:09:07<1:33:00,  2.98entity/s]

Error fetching Q4896059: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11114/27759 [1:09:08<1:32:27,  3.00entity/s]

Error fetching Q59223278: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11119/27759 [1:09:10<1:35:39,  2.90entity/s]

Error fetching Q11224: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11124/27759 [1:09:11<1:35:58,  2.89entity/s]

Error fetching Q16194015: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11125/27759 [1:09:12<1:33:04,  2.98entity/s]

Error fetching Q265852: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11132/27759 [1:09:14<1:32:11,  3.01entity/s]

Error fetching Q60497: HTTP Error 429: Too Many Requests


Fetching entity names:  40% 11136/27759 [1:09:15<1:30:51,  3.05entity/s]

Error fetching Q1185782: HTTP Error 429: Too Many Requests


Fetching entity names:  44% 12321/27759 [1:16:29<1:30:34,  2.84entity/s]

Error fetching Q15216588: HTTP Error 429: Too Many Requests


Fetching entity names:  44% 12322/27759 [1:16:30<1:28:01,  2.92entity/s]

Error fetching Q981479: HTTP Error 429: Too Many Requests


Fetching entity names:  44% 12323/27759 [1:16:30<1:26:10,  2.99entity/s]

Error fetching Q671510: HTTP Error 429: Too Many Requests


Fetching entity names:  44% 12328/27759 [1:16:32<1:26:41,  2.97entity/s]

Error fetching Q7689805: HTTP Error 429: Too Many Requests


Fetching entity names:  44% 12330/27759 [1:16:32<1:24:46,  3.03entity/s]

Error fetching Q10853379: HTTP Error 429: Too Many Requests


Fetching entity names:  45% 12364/27759 [1:16:45<1:24:58,  3.02entity/s]

Error fetching Q4731284: HTTP Error 429: Too Many Requests


Fetching entity names:  45% 12375/27759 [1:16:48<1:28:51,  2.89entity/s]

Error fetching Q49473179: HTTP Error 429: Too Many Requests


Fetching entity names:  45% 12382/27759 [1:16:51<1:25:47,  2.99entity/s]

Error fetching Q16149057: HTTP Error 429: Too Many Requests


Fetching entity names:  45% 12395/27759 [1:16:55<1:26:01,  2.98entity/s]

Error fetching Q731804: HTTP Error 429: Too Many Requests


Fetching entity names:  45% 12399/27759 [1:16:57<1:37:50,  2.62entity/s]

Error fetching Q5491446: HTTP Error 429: Too Many Requests


Fetching entity names:  45% 12404/27759 [1:16:58<1:31:07,  2.81entity/s]

Error fetching Q1191778: HTTP Error 429: Too Many Requests


Fetching entity names:  53% 14657/27759 [1:30:33<10:16:40,  2.82s/entity]

Error fetching Q1770885: <urlopen error [Errno -3] Temporary failure in name resolution>


Fetching entity names:  56% 15665/27759 [1:36:24<1:08:27,  2.94entity/s] 

Error fetching Q184116: HTTP Error 429: Too Many Requests


Fetching entity names:  56% 15675/27759 [1:36:28<1:07:48,  2.97entity/s]

Error fetching Q2072805: HTTP Error 429: Too Many Requests


Fetching entity names:  56% 15683/27759 [1:36:30<1:08:29,  2.94entity/s]

Error fetching Q190585: HTTP Error 429: Too Many Requests
Error fetching Q109711: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15688/27759 [1:36:32<1:07:57,  2.96entity/s]

Error fetching Q4570025: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15689/27759 [1:36:32<1:06:43,  3.02entity/s]

Error fetching Q6483471: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15691/27759 [1:36:33<1:04:39,  3.11entity/s]

Error fetching Q179359: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15699/27759 [1:36:36<1:05:39,  3.06entity/s]

Error fetching Q2865008: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15700/27759 [1:36:36<1:04:45,  3.10entity/s]

Error fetching Q196105: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15702/27759 [1:36:37<1:04:21,  3.12entity/s]

Error fetching Q7548048: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15709/27759 [1:36:39<1:07:07,  2.99entity/s]

Error fetching Q7893188: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15727/27759 [1:36:46<1:16:11,  2.63entity/s]

Error fetching Q7114751: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15749/27759 [1:36:53<1:05:16,  3.07entity/s]

Error fetching Q13437: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15751/27759 [1:36:54<1:13:52,  2.71entity/s]

Error fetching Q7989658: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15779/27759 [1:37:03<1:06:03,  3.02entity/s]

Error fetching Q16147495: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15780/27759 [1:37:03<1:04:41,  3.09entity/s]

Error fetching Q1540534: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15786/27759 [1:37:05<1:05:10,  3.06entity/s]

Error fetching Q2151592: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15796/27759 [1:37:09<1:08:47,  2.90entity/s]

Error fetching Q212757: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15800/27759 [1:37:10<1:05:29,  3.04entity/s]

Error fetching Q113631: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15801/27759 [1:37:10<1:04:58,  3.07entity/s]

Error fetching Q161140: HTTP Error 429: Too Many Requests


Fetching entity names:  57% 15807/27759 [1:37:12<1:12:06,  2.76entity/s]

Error fetching Q26495: HTTP Error 429: Too Many Requests


Fetching entity names:  69% 19084/27759 [1:56:55<51:38,  2.80entity/s]  

Error fetching Q14859: HTTP Error 429: Too Many Requests


Fetching entity names:  69% 19086/27759 [1:56:56<49:21,  2.93entity/s]

Error fetching Q7953134: HTTP Error 429: Too Many Requests


Fetching entity names:  69% 19089/27759 [1:56:57<47:04,  3.07entity/s]

Error fetching Q131874: HTTP Error 429: Too Many Requests


Fetching entity names:  75% 20794/27759 [2:06:56<42:06,  2.76entity/s]  

Error fetching Q4118977: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22469/27759 [2:16:53<29:30,  2.99entity/s]  

Error fetching Q48485: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22471/27759 [2:16:53<29:11,  3.02entity/s]

Error fetching Q5386829: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22472/27759 [2:16:54<28:41,  3.07entity/s]

Error fetching Q5178891: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22474/27759 [2:16:54<28:38,  3.08entity/s]

Error fetching Q6302565: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22481/27759 [2:16:57<28:29,  3.09entity/s]

Error fetching Q981734: HTTP Error 429: Too Many Requests
Error fetching Q257083: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22482/27759 [2:16:57<28:18,  3.11entity/s]

Error fetching Q49002: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22484/27759 [2:16:58<27:40,  3.18entity/s]

Error fetching Q2433285: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22487/27759 [2:16:59<33:19,  2.64entity/s]

Error fetching Q658447: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22491/27759 [2:17:01<43:06,  2.04entity/s]

Error fetching Q24963455: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22492/27759 [2:17:01<38:33,  2.28entity/s]

Error fetching Q847018: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22494/27759 [2:17:02<33:19,  2.63entity/s]

Error fetching Q821757: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22497/27759 [2:17:03<30:03,  2.92entity/s]

Error fetching Q2214113: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22500/27759 [2:17:04<28:57,  3.03entity/s]

Error fetching Q6747420: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22508/27759 [2:17:07<33:53,  2.58entity/s]

Error fetching Q967868: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22509/27759 [2:17:07<32:05,  2.73entity/s]

Error fetching Q18062568: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22528/27759 [2:17:14<29:20,  2.97entity/s]

Error fetching Q16558: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22537/27759 [2:17:17<28:23,  3.07entity/s]

Error fetching Q1894432: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22541/27759 [2:17:26<2:53:22,  1.99s/entity]

Error fetching Q7545592: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22545/27759 [2:17:27<1:03:50,  1.36entity/s]

Error fetching Q2624171: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22546/27759 [2:17:27<52:56,  1.64entity/s]  

Error fetching Q7954972: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22554/27759 [2:17:30<29:53,  2.90entity/s]

Error fetching Q39450: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22556/27759 [2:17:31<28:57,  2.99entity/s]

Error fetching Q2581445: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22564/27759 [2:17:33<28:09,  3.07entity/s]

Error fetching Q607212: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22566/27759 [2:17:34<27:55,  3.10entity/s]

Error fetching Q2734098: HTTP Error 429: Too Many Requests


Fetching entity names:  81% 22576/27759 [2:17:37<29:47,  2.90entity/s]

Error fetching Q212105: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24181/27759 [2:26:54<21:09,  2.82entity/s]  

Error fetching Q16186061: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24184/27759 [2:26:55<21:28,  2.77entity/s]

Error fetching Q5434900: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24190/27759 [2:26:57<20:34,  2.89entity/s]

Error fetching Q53340: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24194/27759 [2:26:59<19:53,  2.99entity/s]

Error fetching Q7745728: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24196/27759 [2:26:59<19:29,  3.05entity/s]

Error fetching Q520665: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24202/27759 [2:27:01<20:29,  2.89entity/s]

Error fetching Q37629: HTTP Error 429: Too Many Requests
Error fetching Q806215: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24215/27759 [2:27:06<19:55,  2.97entity/s]

Error fetching Q7855618: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24229/27759 [2:27:11<19:16,  3.05entity/s]

Error fetching Q16980469: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24233/27759 [2:27:12<19:13,  3.06entity/s]

Error fetching Q506428: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24234/27759 [2:27:12<18:58,  3.10entity/s]

Error fetching Q5038835: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24244/27759 [2:27:15<19:35,  2.99entity/s]

Error fetching Q54238: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24245/27759 [2:27:16<19:13,  3.05entity/s]

Error fetching Q3186931: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24247/27759 [2:27:17<22:28,  2.60entity/s]

Error fetching Q206237: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24262/27759 [2:27:22<20:04,  2.90entity/s]

Error fetching Q1535516: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24265/27759 [2:27:23<19:26,  3.00entity/s]

Error fetching Q983540: HTTP Error 429: Too Many Requests


Fetching entity names:  87% 24273/27759 [2:27:25<18:58,  3.06entity/s]

Error fetching Q1338800: HTTP Error 429: Too Many Requests


Fetching entity names:  88% 24301/27759 [2:27:35<18:55,  3.05entity/s]

Error fetching Q18637970: HTTP Error 429: Too Many Requests


Fetching entity names:  88% 24308/27759 [2:27:37<19:10,  3.00entity/s]

Error fetching Q4731296: HTTP Error 429: Too Many Requests


Fetching entity names:  93% 25905/27759 [2:36:52<10:10,  3.04entity/s]

Error fetching Q3154526: HTTP Error 429: Too Many Requests


Fetching entity names:  93% 25913/27759 [2:36:55<11:29,  2.68entity/s]

Error fetching Q3715188: HTTP Error 429: Too Many Requests


Fetching entity names:  93% 25922/27759 [2:36:58<10:05,  3.03entity/s]

Error fetching Q213205: HTTP Error 429: Too Many Requests


Fetching entity names:  93% 25926/27759 [2:36:59<10:04,  3.03entity/s]

Error fetching Q7589619: HTTP Error 429: Too Many Requests


Fetching entity names:  93% 25931/27759 [2:37:01<09:55,  3.07entity/s]

Error fetching Q788975: HTTP Error 429: Too Many Requests


Fetching entity names:  93% 25943/27759 [2:37:05<10:12,  2.97entity/s]

Error fetching Q6830527: HTTP Error 429: Too Many Requests


Fetching entity names:  93% 25948/27759 [2:37:07<10:48,  2.79entity/s]

Error fetching Q506921: HTTP Error 429: Too Many Requests


Fetching entity names:  93% 25952/27759 [2:37:08<10:03,  3.00entity/s]

Error fetching Q6505: HTTP Error 429: Too Many Requests


Fetching entity names:  94% 25956/27759 [2:37:09<09:52,  3.04entity/s]

Error fetching Q755075: HTTP Error 429: Too Many Requests


Fetching entity names:  94% 25960/27759 [2:37:11<09:58,  3.01entity/s]

Error fetching Q14950689: HTTP Error 429: Too Many Requests


Fetching entity names:  94% 25967/27759 [2:37:13<10:05,  2.96entity/s]

Error fetching Q7981065: HTTP Error 429: Too Many Requests


Fetching entity names:  94% 25974/27759 [2:37:15<10:08,  2.93entity/s]

Error fetching Q13973: HTTP Error 429: Too Many Requests


Fetching entity names: 100% 27759/27759 [2:48:05<00:00,  2.75entity/s]
