## Sentiments and topics

In [1]:
import pandas as pd
topics_df=pd.read_csv('../data/df_with_topics.csv')
sentiments_df=pd.read_csv('../data/final_df_sentiments.csv')

In [2]:
import datetime
topics_df.web_publication_date=pd.to_datetime(topics_df.web_publication_date)

In [3]:
sent_topics=pd.merge(topics_df, sentiments_df, how='left', on=["Unnamed: 0",'article_nr', 'sentence', 'wordcount', 'section_name',
        'headline', 'web_title', 'production_office', 
       'publication'])

In [4]:
sent_topics=sent_topics.drop(columns=['web_publication_date_y'])

In [5]:
#Getting a dataframe with only the relevant topics and years
relevant_topics=[3, 13, 18, 22, 24, 25, 27, 29, 32, 33, 40, 42]
relevant_topics_df=sent_topics[sent_topics.topic.isin(relevant_topics)]
relevant_topics_df=relevant_topics_df[relevant_topics_df.web_publication_date_x > '2013']
relevant_topics_df=relevant_topics_df.rename(columns={'web_publication_date_x':'web_publication_date', 'label':'sentiment', 'score':'sentiment_score', 'Unnamed: 0':'sentence_nr'})

#Getting a column with the negative of the sentiment score for negative sentiments
relevant_topics_df["sentiment_score_1"]=relevant_topics_df.apply(lambda x: x.sentiment_score*(-1) if x.sentiment=="NEGATIVE" else x.sentiment_score, axis=1)

#Getting a year column
import datetime
relevant_topics_df["year"]=pd.to_datetime(topics_df.web_publication_date).dt.year

In [6]:
#Saving the dataframe
relevant_topics_df.to_csv('relevant_topics_df.csv')

In [7]:
# grouping by topic and publication office and getting percentages
temp=relevant_topics_df.groupby(["production_office","topic", "Name"]).count().groupby(level=0).apply(lambda x: 100 * x / x.sum()).reset_index()
temp=temp.sort_values(by='topic', ascending=False)

In [8]:
import plotly.express as px
#Horizontal bar chart
fig = px.bar(temp, y="Name", x="sentence_nr", facet_col="production_office", facet_col_wrap=3,orientation='h', 
        labels=dict(Name="Topics", sentence_nr="Percentage of Topics"),
                title="Percentage of Topics per Country.<br><sup> </sup><br><br>",
                height= 500, width=1000)
                #x=['0','10', '20', '30', '40', '50', '60','70'],
                #y=['0','10', '20', '30', '40', '50', '60','70'])

fig.write_image("outputs/percentage_topics_country.png")
fig.show()

In [9]:
import numpy as np
topics_office=relevant_topics_df.groupby(["production_office","topic", "Name"]).count().reset_index().rename(columns={'sentence_nr':'number_topics'})
#groupby(level=0).apply(lambda x: 100 * x / x.sum()).reset_index()
topics_office=topics_office.sort_values(by='topic')

weighted_count_topics=lambda x: np.average(x.number_topics, weights=x.Count)
average_topics_office=topics_office.groupby(["production_office", "topic", "Name"]).apply(weighted_count_topics).reset_index().rename(columns={0:"weighted_count_topics"})
#average_dem_year=pd.merge(average_dem_year,vdem_agg_cost, how="left")
average_topics_office=average_topics_office.sort_values(by='topic', ascending=False)
average_topics_office.head(5)


Unnamed: 0,production_office,topic,Name,weighted_count_topics
35,US,42,42_machines_machine_humans_human,71.0
11,AUS,42,42_machines_machine_humans_human,40.0
23,UK,42,42_machines_machine_humans_human,506.0
34,US,40,40_language_word_words_translation,46.0
10,AUS,40,40_language_word_words_translation,55.0


In [10]:
import plotly.express as px
#Horizontal bar chart
fig = px.bar(average_topics_office, y="Name", x="weighted_count_topics", facet_col="production_office", facet_col_wrap=3,orientation='h', 
        #template="ggplot2",
        labels=dict(Name="Topics", weighted_count_topics="Weighted Count"),
                title="Weighted Count of Topics per Country.<br><sup> </sup><br><br>",
                height= 500, width=1000)
                #x=['0','10', '20', '30', '40', '50', '60','70'],
                #y=['0','10', '20', '30', '40', '50', '60','70'])
fig.write_image("outputs/Weighted_topics_country.png")
fig.show()

In [11]:
# grouping by topic and publication office and getting average sentiment score
sentiments_topics=relevant_topics_df.groupby(["production_office","topic", "Name"])["sentiment_score_1"].mean().reset_index()
#relevant_topics_df.groupby(["production_office", "Name", "sentiment"]).count().groupby(level=0).apply(lambda x: 100 * x / x.sum()).reset_index()
sentiments_topics=sentiments_topics.sort_values(by='topic', ascending=False)

In [12]:
import plotly.express as px
#Horizontal bar chart
fig = px.bar(sentiments_topics, y="Name", x="sentiment_score_1", facet_col="production_office", facet_col_wrap=3,orientation='h', 
        #template="ggplot2", 
        labels=dict(Name="Topics", sentiment_score_1="Sentiment score"),
                title="Sentiments per topic and country.",
                height= 500, width=1000)
fig.write_image("outputs/sentiment_per_topic.png")
fig.show()

In [13]:
# grouping by topic and publication office and getting average sentiment score
sentiments_topics_year=relevant_topics_df.groupby(["year","production_office"])["sentiment_score_1"].mean().reset_index()
#relevant_topics_df.groupby(["production_office", "Name", "sentiment"]).count().groupby(level=0).apply(lambda x: 100 * x / x.sum()).reset_index()


In [16]:
import plotly.express as px
#Horizontal bar chart
fig = px.bar(sentiments_topics_year, x="year", y="sentiment_score_1", facet_col="production_office", facet_col_wrap=3, 
        labels=dict(year="Year", sentiment_score_1="Sentiment Score"),
                title="Sentiment score of sentences between 2013 and 2022 per Country.")
fig.write_image("outputs/sentiment_per_year.png")
fig.show()

In [17]:
representative_sentences=pd.read_csv("data/representative_docs.csv")

In [18]:
representative_sentences

Unnamed: 0,topic,representative_sentence
0,3,Even if experts solve the engineering problem ...
1,3,“This type of oversight makes me worried that ...
2,3,"Neither, says Lanier: artificial intelligence ..."
3,3,"From this amazing text, Russell pulled one dra..."
4,3,Russell believes that our current approach to ...
...,...,...
514,40,Still playable?
515,40,"Pretty good actually, and certainly playable e..."
516,42,"Hey, come on, they say, we’re doing everything..."
517,42,I have argued that we should be more timid and...


In [39]:
representative_sentences.representative_sentence[80]

'What I’m reading Gee whizHow quickly will the switch to electric cars – made by Elon Musk (below) and his ilk – happen?'

In [38]:
print(representative_sentences[representative_sentences.topic==3])

     topic                            representative_sentence
0        3  Even if experts solve the engineering problem ...
1        3  “This type of oversight makes me worried that ...
2        3  Neither, says Lanier: artificial intelligence ...
3        3  From this amazing text, Russell pulled one dra...
4        3  Russell believes that our current approach to ...
..     ...                                                ...
154      3  Really though, this is already happening in vi...
155      3  Experimental indie titles like Prom Week and F...
156      3  But there's not a scintilla of doubt in Kurzwe...
157      3             But Kurzweil is anything but ordinary.
158      3  But then, it's Kurzweil's single-mindedness th...

[159 rows x 2 columns]


In [123]:
representative_sentences.representative_sentence.unique().size

511

In [24]:
relevant_topics_df.representative_sentence[1]

AttributeError: 'DataFrame' object has no attribute 'representative_sentence'

In [None]:
'Even if experts solve the engineering problem of aligning AI with select human values, further political concerns remain.'

In [26]:
relevant_sentiment=relevant_topics_df[(relevant_topics_df.sentence.isin(representative_sentences.representative_sentence))]

In [35]:
relevant_topics_df[(relevant_topics_df.sentence=='First, you have to deal with the question: “Ethics??')]

Unnamed: 0,sentence_nr,article_nr,sentence,wordcount,section_name,web_publication_date,headline,web_title,production_office,publication,topic,Topic,Count,Name,sentiment,sentiment_score,sentiment_score_1,year
264099,111751,2614,"First, you have to deal with the question: “Et...",898,Opinion,2018-10-28 05:00:09+00:00,Has Apple finally given its super-fast iPhone ...,Has Apple finally given its super-fast iPhone ...,UK,The Observer,3,3,3497,3_ai_intelligence_artificial_artificial intell...,NEGATIVE,0.992743,-0.992743,2018


In [149]:
relevant_sentiment.sentence.unique().size

417

In [76]:
tem=relevant_sentiment[(relevant_sentiment.topic==32) & (relevant_sentiment.production_office=="AUS")] 

In [77]:
tem

Unnamed: 0,sentence_nr,article_nr,sentence,wordcount,section_name,web_publication_date,headline,web_title,production_office,publication,topic,Topic,Count,Name,sentiment,sentiment_score,sentiment_score_1,year
290419,11022,286,Related: Why is Facebook shutting down its fac...,695,World news,2021-11-03 07:02:35+00:00,Facial recognition firm Clearview AI to appeal...,Facial recognition firm Clearview AI to appeal...,AUS,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,NEGATIVE,0.999231,-0.999231,2021
290430,11037,286,Facebook this week also announced a move away ...,695,World news,2021-11-03 07:02:35+00:00,Facial recognition firm Clearview AI to appeal...,Facial recognition firm Clearview AI to appeal...,AUS,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,NEGATIVE,0.991553,-0.991553,2021
290917,80200,1938,Perth City council is pressing ahead with a t...,808,Technology,2019-06-11 19:25:18+00:00,Perth council facial recognition trial greeted...,Perth council facial recognition trial greeted...,AUS,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,POSITIVE,0.990467,0.990467,2019


In [87]:
print(tem2.sentence[290363])

Problems with gender and racial biases have been well documented in other AI-based technology such as facial recognition, and they could also exist in these types of systems, says Vicente Ordóñez-Roman, a computer vision expert at the University of Virginia.


In [85]:
tem2=relevant_topics_df[(relevant_topics_df.topic==32) & (relevant_topics_df.production_office=="US")] 

In [86]:
tem2

Unnamed: 0,sentence_nr,article_nr,sentence,wordcount,section_name,web_publication_date,headline,web_title,production_office,publication,topic,Topic,Count,Name,sentiment,sentiment_score,sentiment_score_1,year
290363,3513,86,Problems with gender and racial biases have be...,1778,US news,2021-06-03 08:00:07+00:00,The future of elder care is here – and it’s ar...,The future of elder care is here – and it’s ar...,US,The Guardian,32,32,910,32_facial_facial recognition_recognition_faces,NEGATIVE,0.995887,-0.995887,2021
290390,8517,220,Problems with gender and racial bias have been...,1858,Technology,2022-04-27 06:30:23+00:00,‘Bossware is coming for almost every worker’: ...,‘Bossware is coming for almost every worker’: ...,US,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,NEGATIVE,0.998695,-0.998695,2022
290392,9768,253,And a notable instance when the software was u...,2004,US news,2022-08-26 07:00:26+00:00,‘I’m afraid’: critics of anti-cheating technol...,‘I’m afraid’: critics of anti-cheating technol...,US,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,NEGATIVE,0.999459,-0.999459,2022
290393,9809,253,"But despite her best efforts, she couldn’t get...",2004,US news,2022-08-26 07:00:26+00:00,‘I’m afraid’: critics of anti-cheating technol...,‘I’m afraid’: critics of anti-cheating technol...,US,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,NEGATIVE,0.999478,-0.999478,2022
290394,9830,253,“We recognize some face detection and gaze det...,2004,US news,2022-08-26 07:00:26+00:00,‘I’m afraid’: critics of anti-cheating technol...,‘I’m afraid’: critics of anti-cheating technol...,US,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,POSITIVE,0.963813,0.963813,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
291228,184910,4202,What is facial recognition?,1347,Technology,2016-01-12 08:00:49+00:00,Shops could soon be targeting ads according to...,Shops could soon be targeting ads according to...,US,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,POSITIVE,0.950038,0.950038,2016
291229,184911,4202,It is technology that can identify people by a...,1347,Technology,2016-01-12 08:00:49+00:00,Shops could soon be targeting ads according to...,Shops could soon be targeting ads according to...,US,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,POSITIVE,0.997092,0.997092,2016
291230,184929,4202,"Owen McCormack, Hoxton Analytics CEO, tells me...",1347,Technology,2016-01-12 08:00:49+00:00,Shops could soon be targeting ads according to...,Shops could soon be targeting ads according to...,US,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,POSITIVE,0.993983,0.993983,2016
291238,203888,4536,He went on to say that problems in image recog...,441,Technology,2015-07-01 15:52:41+00:00,Google says sorry for racist auto-tag in photo...,Google says sorry for racist auto-tag in photo...,US,theguardian.com,32,32,910,32_facial_facial recognition_recognition_faces,NEGATIVE,0.996828,-0.996828,2015
