# The Popularity of Prompt Engineering Methods

This series of notebooks produces statistics on Semantic Scholar citations per day for all of the prompt engineering approaches listed at "https://www.promptingguide.ai/papers", "https://en.wikipedia.org/wiki/Prompt_engineering#Text-to-text", and the citations section for "The Practicality of Prompt Engineering".

This file loads in information on papers not initially found and then completes processing of the citation statistics.

In [None]:
# Print current datetime/run as of date
import datetime
print(datetime.datetime.now())


In [None]:
# Imports
import pandas as pd
from datetime import datetime
import numpy as np


In [None]:
# Import data "Semantic Scholar Citations.xlsx"
semantic_scholar_df = pd.read_excel('Semantic Scholar Citations.xlsx', sheet_name='Sheet1')

semantic_scholar_df


In [None]:
# Add days between publication and today as a column to semantic_scholar_df

# Convert publication date to datetime object
semantic_scholar_df["ss_publication_date"] = pd.to_datetime(semantic_scholar_df["ss_publication_date"])

print(semantic_scholar_df["ss_publication_date"])

# Column for end_date of today
semantic_scholar_df["end_date"] = datetime.today()

# Calculate days between publication and end_date
semantic_scholar_df["days_from_pub_to_end_date"] = (semantic_scholar_df['end_date'] - semantic_scholar_df['ss_publication_date']) / np.timedelta64(1, 'D')

semantic_scholar_df


In [None]:
# Add a column for citations per day
semantic_scholar_df["citations_per_day"] = semantic_scholar_df["citation_count"] / semantic_scholar_df["days_from_pub_to_end_date"]

semantic_scholar_df


In [None]:
# Ensure no duplicates on 'semantic scholar title'

no_duplicates_df = semantic_scholar_df.drop_duplicates(subset=['semantic scholar title'])

print(len(no_duplicates_df))


In [None]:
# Print any duplicates on 'semantic scholar title'

duplicates_df = semantic_scholar_df[semantic_scholar_df.duplicated(subset=['semantic scholar title'], keep=False)]

# Sort by 'semantic scholar title' to make it easier to read
duplicates_df = duplicates_df.sort_values(by=['semantic scholar title'])

duplicates_df


In [None]:
# Drop duplicated rows in semantic_scholar_df
semantic_scholar_df = semantic_scholar_df.drop_duplicates()

# Trim whitespace in 'paper title' column
semantic_scholar_df['paper title'] = semantic_scholar_df['paper title'].str.strip()


In [None]:
# Add back on missing_semantic_scholar_title_df
semantic_scholar_df = pd.concat([semantic_scholar_df, missing_semantic_scholar_title_df], ignore_index=True)


In [None]:
# Sort by citations per day, descending
semantic_scholar_df = semantic_scholar_df.sort_values(by=["citations_per_day"], ascending=False)

# Output to Excel
semantic_scholar_df.to_excel("Semantic Scholar Citations Per Day.xlsx", index=False)
