In [2]:
import pandas as pd
import os
import warnings
import json
warnings.filterwarnings('ignore')

In [3]:
data = pd.read_parquet(".." + os.sep + "docs"  + os.sep +  "data" + os.sep + "agcomdata.parquet")

In [4]:
data.columns

Index(['channel', 'program', 'day', 'lastname', 'name', 'affiliation', 'topic',
       'duration', 'kind'],
      dtype='object')

# Description
<table>
<tr>
<th>columns</th>
<th>description</th>
<th>content</th>
<tr>
<tr>
<td>channel</td><td>channel name</td><td>category</td>
</tr>
<tr>
<td>program</td><td>name of the program</td><td>category</td>
</tr>
<tr>
<td>day</td><td>day of the event</td><td>format dd-mm-yyyy (python datetime)</td>
</tr>
<tr>
<td>name</td><td>name of the person concerned or "political movement" involved"</td><td>text - if the value is "political movement" this means that is a political movement and you can identify the name from the field lastname </td>
</tr>
<tr>
<td>lastname</td><td>surname of the person concerned or political movement</td><td>text, if name=="political movement" this field contains the name of the movement</td>
</tr>
<tr>
<td>topic</td><td>main topic of the discussion</td><td>category</td>
</tr>
<tr>
<td>affiliation</td><td>affiliation</td><td>category</td>
</tr>
<tr>
<td>duration</td><td>minutes duration of intervention</td><td>integer</td>
</tr>
<tr>
<td>kind</td><td>type of intervention</td><td>can take on the values talk or news</td>
</tr>
<tr>
</table>

In [5]:
data.channel.unique()

array(['CANALE 5', 'CIELO', 'ITALIA 1', 'LA7', 'LA7D',
       'Mediaset TgCom 24', 'NOVE', 'RAI 1', 'RAI 2', 'RAI 3', 'RAINEWS',
       'RETE 4', 'SKY TG 24 CAN. 50', 'SKY TG24', 'TV8', 'RTL102.5',
       'Rai Radio1', 'Rai Radio2', 'Rai Radio3'], dtype=object)

In [6]:
data.program.unique()

array(['TG5', 'TG5 PRIMA PAGINA', 'MATTINO CINQUE - NEWS', ...,
       'MESSAGGIO DI FINE ANNO DEL PRESIDENTE DELLA REPUBBLICA SERGIO MATTARELLA',
       'CLIP: FATTI ED EVENTI INTERNAZIONALI CHE HANNO CARATTERIZZATO IL 2020',
       'CLIP: MESSAGGIO DI FINE ANNO DEL PRESIDENTE DELLA REPUBBLICA SERGIO MATTARELLA'],
      dtype=object)

In [7]:
data.day.head(3)

2   2023-01-01
3   2023-01-01
4   2023-01-01
Name: day, dtype: datetime64[ns]

In [8]:
name_unique = data.name.unique()
print(name_unique[:5])

['political movement' 'Silvio' 'Giuseppe' 'Enrico' 'Matteo']


In [9]:
data.lastname.unique()

array(['Azione-Italia Viva-Renew Europe', 'Berlusconi', 'Conte', ...,
       'Delli Colli', 'Calearo', 'Gramazio'], dtype=object)

In [10]:
data[data.name == "political movement"]['lastname'].head(3)

2     Azione-Italia Viva-Renew Europe
8     Azione-Italia Viva-Renew Europe
18    Azione-Italia Viva-Renew Europe
Name: lastname, dtype: object

In [11]:
data[data.name != "political movement"]['lastname'].head(3)

3    Berlusconi
4         Conte
5         Letta
Name: lastname, dtype: object

In [12]:
data.topic.unique()

array(['Politica e attività istituzionali',
       'Religione e questioni religiose', 'Medicina, salute e scienza',
       'Esteri', 'Economia, finanza e lavoro', 'Costume e società',
       'Sport', 'Società', 'Cronaca', 'Cultura e istruzione', 'Ambiente',
       'Giustizia', 'Altro', 'Mass media e spettacolo', 'Programma',
       'Fine', 'Informazioni di servizio', 'Pubblicità'], dtype=object)

In [13]:
unique_affiliations = data['affiliation'].unique()
print(unique_affiliations[:5])

['Azione-Italia Viva-Renew Europe' 'PDL - Forza Italia'
 'Movimento 5 Stelle' 'Partito Democratico' 'Lega Nord']


In [14]:
data.kind.unique()

array(['Notizia', 'Parola'], dtype=object)

In [15]:
data.duration.head(5)

2     7
3    42
4    24
5    25
6    17
Name: duration, dtype: int64

In [16]:
data["fullname"] = data['name'] + " " + data['lastname']
#data[data['name'] == "political movement"]
data

Unnamed: 0,channel,program,day,lastname,name,affiliation,topic,duration,kind,fullname
2,CANALE 5,TG5,2023-01-01,Azione-Italia Viva-Renew Europe,political movement,Azione-Italia Viva-Renew Europe,Politica e attività istituzionali,7,Notizia,political movement Azione-Italia Viva-Renew Eu...
3,CANALE 5,TG5,2023-01-01,Berlusconi,Silvio,PDL - Forza Italia,Politica e attività istituzionali,42,Notizia,Silvio Berlusconi
4,CANALE 5,TG5,2023-01-01,Conte,Giuseppe,Movimento 5 Stelle,Politica e attività istituzionali,24,Notizia,Giuseppe Conte
5,CANALE 5,TG5,2023-01-01,Letta,Enrico,Partito Democratico,Politica e attività istituzionali,25,Notizia,Enrico Letta
6,CANALE 5,TG5,2023-01-01,Renzi,Matteo,Azione-Italia Viva-Renew Europe,Politica e attività istituzionali,17,Notizia,Matteo Renzi
...,...,...,...,...,...,...,...,...,...,...
30508,TV8,TG8,2021-01-30,Fontana,Attilio,Lega Nord,"Medicina, salute e scienza",11,Parola,Attilio Fontana
30509,TV8,TG8,2021-01-30,Mattarella,Sergio,Presidente della Repubblica,Politica e attività istituzionali,41,Parola,Sergio Mattarella
30510,TV8,TG8,2021-01-31,Movimento 5 Stelle,political movement,Movimento 5 Stelle,Politica e attività istituzionali,3,Notizia,political movement Movimento 5 Stelle
30511,TV8,TG8,2021-01-31,Partito Democratico,political movement,Partito Democratico,Politica e attività istituzionali,4,Notizia,political movement Partito Democratico


In [None]:
#divide political groups (var-->political_groups) and politicians (var-->politicians) in an array with names

political_groups = data[data['name'] == "political movement"].lastname.unique()

politicians = data[data['name'] != "political movement"]
politicians = politicians.fullname.unique()

In [None]:
#find all values of all columns (var: channel, program, affiliation, topic) all arrays

channels = data.channel.unique()
programs = data.program.unique()
affiliations = data.affiliation.unique()
topics = data.topic.unique()

In [None]:
#how much a politician talked about all the possible topics (politicianTopics --> json variable {"topic", "minutes", "interventions"})

n = "Silvio Berlusconi"

x = data[data['fullname'] == n]
politicianTopics = []


for t in topics:
  y = x[x['topic'] == t]
  sum = y["duration"].sum()
  politicianTopics.append({"topic": t, "minutes" : sum, "interventions" : y.shape[0]})


#how much a political group talked about all the possible topics (polGroupTopics --> json variable {"topic", "minutes", "interventions"})

m = "Azione-Italia Viva-Renew Europe"

w = data[data['lastname'] == n]
polGroupTopics = []


for t in topics:
  z = w[w['topic'] == t]
  sum = z["duration"].sum()
  polGroupTopics.append({"topic": t, "minutes" : sum, "interventions" : z.shape[0]})

In [None]:
#how much a politician talked in a specific channel (politicianChannels --> json variable {"channel", "minutes", "interventions"})

n = "Silvio Berlusconi"

x = data[data['fullname'] == n]
politicianChannels = []

for c in channels:
  y = x[x['channel'] == c]
  sum = y["duration"].sum()
  politicianChannels.append({"channel": c, "minutes" : sum, "interventions" : y.shape[0]})


#how much a political group talked in a specific channel (polGroupChannels --> json variable {"channel", "minutes", "interventions"})

m = "Azione-Italia Viva-Renew Europe"

w = data[data['lastname'] == n]
polGroupChannels = []


for c in channels:
  z = w[w['channel'] == c]
  sum = z["duration"].sum()
  polGroupChannels.append({"topic": c, "minutes" : sum, "interventions" : z.shape[0]})

In [None]:
#how many politicians participate (have participated) in every political group (politiciansInAffiliation --> json variable {"affiliation", "politicians"[] })

politiciansInAffiliation = []

for a in affiliations:
  x = data[data['affiliation'] == a]
  y = x[x['name'] != "political movement"]
  politiciansInAffiliation.append({"affiliation": a, "politicians": y.fullname.unique()})

In [None]:
#how much a political group has intervened in tv (interventionsPoliticalGroup --> json variable {"affiliation", "interventions", "minutes"})

interventionsPoliticalGroup = []

for a in affiliations:
  x = data[data['affiliation'] == a]
  sum = x["duration"].sum()
  interventionsPoliticalGroup.append({"affiliation": a, "interventions": x.shape[0], "minutes": sum})

In [None]:
#how many political groups a politician have participated in (politicianPoliticalGroups[])

n = "Matteo Renzi"

x = data[data["fullname"] == n]

politicianPoliticalGroups = x.affiliation.unique()