**Authentification**

In [1]:
from google.colab import auth
import gspread
from google.auth import default

#authenticating to google
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

**Import Google Sheet**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

worksheet = gc.open("Google Sheets File Name").sheet1 #insert Google Sheet file name

#get_all_values gives a list of rows
rows = worksheet.get_all_values()
search_terms = pd.DataFrame(rows)

#creating columns name
search_terms.columns = search_terms.iloc[0]
search_terms = search_terms.iloc[1:]

search_terms.head()

**Stem and discover n-grams**

In [None]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import pandas as pd

# Initialize the stemmer
stemmer = PorterStemmer()

# Define a function to stem words in the search terms
def stem_text(text):
    return ' '.join([stemmer.stem(word) for word in word_tokenize(text)])

# Apply stemming to the search terms
search_terms['Search term'] = search_terms['Search term'].apply(stem_text)

# Define the n-gram range to include both bigrams and trigrams
ngram_range = (2, 3)

# Initialize the CountVectorizer
vectorizer = CountVectorizer(ngram_range=ngram_range, stop_words='english')

# Fit and transform the data
X = vectorizer.fit_transform(search_terms['Search term'])

# Convert to DataFrame for easier analysis
ngram_df = pd.DataFrame(X.toarray(), columns=vectorizer.get_feature_names_out())

# Sum the occurrences of each n-gram
ngram_counts = ngram_df.sum().sort_values(ascending=False)

print(ngram_counts)


**Top 15 n-grams**

In [None]:
import matplotlib.pyplot as plt

ngram_counts.head(15).sort_values().plot(kind='barh', figsize=(10, 5))
plt.title('Top 15 n-grams')
plt.xlabel('Frequency')
plt.ylabel('n-grams')
plt.show()

**Export all to CSV**

In [8]:
# function to export n-grams to CSV
from google.colab import files
ngram_data = pd.DataFrame({'n-gram': ngram_counts.index, 'sum': ngram_counts.values})
ngram_data.to_csv('n-grams.csv', index=False)
files.download('n-grams.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>