In [None]:
!pip install rdflib pandas

Collecting rdflib
  Downloading rdflib-7.1.4-py3-none-any.whl.metadata (11 kB)
Downloading rdflib-7.1.4-py3-none-any.whl (565 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/565.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m563.2/565.1 kB[0m [31m19.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m565.1/565.1 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdflib
Successfully installed rdflib-7.1.4


In [None]:
from google.colab import drive
import pandas as pd
from collections import Counter
from rdflib import Graph, Literal, RDF, URIRef, Namespace
from rdflib.namespace import FOAF, XSD

In [None]:
drive.mount('/content/drive')

features_df = pd.read_csv("/content/drive/MyDrive/STRUCT/FEATURES.tsv", sep="\t")
speaker_df = pd.read_csv("/content/drive/MyDrive/STRUCT/SPEAKER.tsv", sep="\t")
channel_df = pd.read_csv("/content/drive/MyDrive/STRUCT/CHANNEL.tsv", sep="\t")
stream_df = pd.read_csv("/content/drive/MyDrive/STRUCT/STREAM.tsv", sep="\t")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
g = Graph()
EX = Namespace("http://example.org/stream/")
g.bind("ex", EX)
g.bind("foaf", FOAF)

for _, row in speaker_df.iterrows():
    speaker_uri = EX[row["speaker_id"]]
    g.add((speaker_uri, RDF.type, EX.Speaker))
    g.add((speaker_uri, FOAF.nick, Literal(row["nickname"], datatype=XSD.string)))
    g.add((speaker_uri, EX.hasChannel, EX[row["channel_id"]]))

for _, row in channel_df.iterrows():
    channel_uri = EX[row["channel_id"]]
    g.add((channel_uri, RDF.type, EX.Channel))
    g.add((channel_uri, FOAF.name, Literal(row["channel_name"], datatype=XSD.string)))
    g.add((channel_uri, EX.link, Literal(row["channel_link"], datatype=XSD.anyURI)))

for _, row in stream_df.iterrows():
    stream_uri = EX[row["stream_id"]]
    g.add((stream_uri, RDF.type, EX.Stream))
    g.add((stream_uri, FOAF.name, Literal(row["stream_name"], datatype=XSD.string)))
    g.add((stream_uri, EX.fullAudio, Literal(row["full_audio_path"], datatype=XSD.string)))
    g.add((stream_uri, EX.category, Literal(row["category"], datatype=XSD.string)))
    g.add((stream_uri, EX.belongsToChannel, EX[row["channel_id"]]))

for _, row in features_df.iterrows():
    text_uri = EX[row["text_id"]]
    g.add((text_uri, RDF.type, EX.Utterance))
    g.add((text_uri, EX.text, Literal(row["text"], datatype=XSD.string)))
    g.add((text_uri, EX.hasEmotion, Literal(row["emotion"], datatype=XSD.string)))
    g.add((text_uri, EX.valence, Literal(row["valence"], datatype=XSD.string)))
    g.add((text_uri, EX.arousal, Literal(row["arousal"], datatype=XSD.string)))
    g.add((text_uri, EX.dominance, Literal(row["dominance"], datatype=XSD.string)))
    g.add((text_uri, EX.speaker, EX[row["speaker_id"]]))
    g.add((text_uri, EX.stream, EX[row["stream_id"]]))

print(f"Количество триплетов в графе: {len(g)}")

Количество триплетов в графе: 133


In [None]:
anger_utterances = []
for s, _, o in g.triples((None, EX.hasEmotion, None)):
    if str(o).lower() == "anger":
        text_literal = g.value(s, EX.text)
        anger_utterances.append((str(s).split("/")[-1], str(text_literal)))

print("------------------------")
print("Эмоция 'anger':")
print(anger_utterances)
print("------------------------")

------------------------
Эмоция 'anger':
[('TEXT_04', 'Я вообще буду удивлен если у нас хотя бы 20-30 человек доедет'), ('TEXT_06', '6 часов лететь я вот предлставляю как это будет выглядеть. я как представляю как я горбатый за ним хожу 6 часов')]
------------------------


In [None]:
sports_streams = []
for s, _, o in g.triples((None, EX.category, None)):
    if str(o).lower() == "sports":
        name = g.value(s, FOAF.name)
        sports_streams.append((str(s).split("/")[-1], str(name)))

print("------------------------")
print("Категория 'sports':")
print(sports_streams)

------------------------
Категория 'sports':
[('STREAM_03', '24 часовая гонка. Nordschleife 24h'), ('STREAM_04', 'Зарубы в Лемане перед крупной обновой')]


In [None]:
text_uri = EX["TEXT_04"]
speaker = g.value(text_uri, EX.speaker)
channel = g.value(speaker, EX.hasChannel)
text_id = str(text_uri).split("/")[-1]
channel_id = str(channel).split("/")[-1] if channel else "Нет канала"

print("------------------------")
print("TEXT_04 связан с каналом:")
print(channel_id)
print("------------------------")

------------------------
TEXT_04 связан с каналом:
CHANNEL_03
------------------------


In [None]:
emotion_counts = Counter()
for _, _, emotion in g.triples((None, EX.hasEmotion, None)):
    emotion_counts[str(emotion).lower()] += 1

print("------------------------")
print("Распределение эмоций:")
print(dict(emotion_counts))
print("------------------------")

------------------------
Распределение эмоций:
{'neutral': 3, 'enjoyment': 3, 'anger': 2, 'sadness': 1}
------------------------
