In [7]:
from googlesearch import search
from newspaper import Article
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
import pandas as pd

import openai

from bokeh.plotting import figure, show, output_file, save
from bokeh.models import ColumnDataSource, HoverTool, CustomJS, Div
from bokeh.layouts import row




In [2]:
sentiment_list = []

# to search
query = "Mito spreadsheet"

search_results = search(query, tld="co.in", num=100, stop=100, pause=2)

for j in search_results:
    try:
        article = Article(j)
        article.download()
        article.parse()
        mitotext = article.text
        sentimentanalyzer = SentimentIntensityAnalyzer()
        sentiment = sentimentanalyzer.polarity_scores(mitotext)
        sentiment_list.append([j, sentiment, mitotext])
    except:
        print("failed to parse: " + str(j))

print(sentiment_list)

failed to parse: https://www.linkedin.com/posts/jacob-diamond-reivich-03ab62145_14-mito-spreadsheet-automation-with-python-activity-7284473222845087744-ZTfU
failed to parse: https://gran-turismo.fandom.com/wiki/Gran_Turismo_7/Car_List


In [17]:

# Extract compound values and URLs
compounds = [entry[1]['compound'] for entry in sentiment_list]
urls = [entry[0] for entry in sentiment_list]

# Define bins
bins = np.linspace(-1, 1, 11)
bin_labels = [f"{round(bins[i], 2)} to {round(bins[i+1], 2)}" for i in range(len(bins)-1)]

# Assign each URL to a bin
df = pd.DataFrame({'url': urls, 'compound': compounds})
df['bin'] = pd.cut(df['compound'], bins=bins, labels=bin_labels, include_lowest=True)

# Count URLs per bin
bin_counts = df['bin'].value_counts().reindex(bin_labels, fill_value=0)

# Group URLs by bin as HTML links
url_map = df.groupby('bin')['url'].apply(
    lambda x: '<br>'.join([f'<a href="{url}" target="_blank">{url}</a>' for url in x])
).reindex(bin_labels, fill_value='No URLs in this bin')

# Data source for the bar chart
source = ColumnDataSource(data=dict(
    x=bin_labels,
    y=bin_counts.values,
    urls=url_map.values
))

# Sidebar (initially empty)
sidebar = Div(
    text="<b>Click a bar to see URLs here</b>", 
    width=400, 
    height=600, 
    styles={
        'overflow-y': 'auto', 
        'border': '1px solid black', 
        'padding': '10px'
    }
)

# Create bar chart
p = figure(
    x_range=bin_labels,
    height=600,
    width=800,
    title="Distribution of Sentiments of Webpages Mentioning Mito Spreadsheets - Click a bar to See URLs",
    tools="tap,pan,wheel_zoom,box_zoom,reset,save",
    toolbar_location="above",
    x_axis_label="Sentiment (Negative < -0.05 < Neutral  < 0.05 < Positive )",
    y_axis_label="Count of Webpages"
)

# Plot bars and enable selection
bars = p.vbar(
    x='x', 
    top='y', 
    width=0.9, 
    source=source, 
    color='#ac84fc', 
    line_color='black', 
    selection_color='#9d6cff'  # Highlight when clicked
)

# Hover effect
hover = HoverTool(
    tooltips=[("Count", "@y")],
    mode="vline"
)
p.add_tools(hover)

# Sidebar update on click using CustomJS
bars.data_source.selected.js_on_change("indices", CustomJS(args=dict(source=source, sidebar=sidebar), code="""
    var selected = source.selected.indices[0];
    if (selected !== undefined) {
        var urls = source.data['urls'][selected];
        sidebar.text = urls;  // Update sidebar with URLs
    } else {
        sidebar.text = "<b>Click a bar to see URLs here</b>";
    }
"""))

# Aesthetics
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.xaxis.major_label_orientation = 1.2

# Layout: Chart + Sidebar
layout = row(p, sidebar)

# Save and show
output_file("../figures/SentimentsGraph.html")
show(layout)



  url_map = df.groupby('bin')['url'].apply(


In [18]:

# Step 1: Group URLs by Sentiment
sentiment_bins = {"Positive": [], "Negative": [], "Neutral": []}

for url, sentiment_dict, text in sentiment_list:
    if sentiment_dict['compound'] >= 0.05:
        sentiment_bins["Positive"].append(url)
    elif sentiment_dict['compound'] <= -0.05:
        sentiment_bins["Negative"].append(url)
    else:
        sentiment_bins["Neutral"].append(url)

# Step 2: Connect to OpenAI and Analyze Popular Words
client = openai.Client(api_key="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")

def get_popular_words(sentiment_name, urls):
    """Ask OpenAI to find popular words from URLs."""
    prompt = (
        f"Here are some websites with {sentiment_name} sentiment: {', '.join(urls)}. "
        "What are 10 popular phrases found in these websites that may explain this sentiment? Only list phrases that have to do with the Mito Spreadsheet/ Mito AI product. Make sure to select quotes from a range of these websites and not just a couple. List the words with bullet points but don't inlude the url."
    )
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7
    )
    return response.choices[0].message.content.strip()

# Step 3: Loop through each sentiment and print the results
for sentiment, urls in sentiment_bins.items():
    if urls:
        words = get_popular_words(sentiment, urls)
        print(f"\nPopular phrases for {sentiment} sentiment:\n{words}")
    else:
        print(f"\nNo URLs for {sentiment} sentiment.")



Popular phrases for Positive sentiment:
- "Mito is a powerful spreadsheet for data scientists"
- "Revolutionize the way you work with data"
- "Automate your data tasks with Mito"
- "Turn spreadsheets into Python"
- "Generate production-ready Python code"
- "Create elegant data apps"
- "Manipulate your data like spreadsheets"
- "Spreadsheet automation with Mito"
- "Data analysis with Mito"
- "The spreadsheet of the future"

No URLs for Negative sentiment.

Popular phrases for Neutral sentiment:
- "Mito is a spreadsheet that's also an AI" (https://www.reddit.com/r/Python/comments/m0bj5g/mito_write_python_10x_faster_by_editing_a/)
- "Mito is a Python tool that allows users to write code efficiently" (https://www.youtube.com/watch?v=kM_FMkkRWR8)
- "Mito is a new Python tool" (https://www.youtube.com/watch?v=Ox94C8-ebwQ)
- "Try Mito to write Python 10x faster" (https://x.com/trymito?lang=en)
- "Mito is a great Python tool for data analysis" (https://www.youtube.com/watch?v=9LuZWQgHJ5Q)
- "