<a href="https://colab.research.google.com/github/frasercrichton/data-investigation-conspiracy-aotearoa/blob/main/analysis/Pattern_of_Life.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Clean and Extract Data


In [85]:
import pandas as pd
import cv2
from deep_translator import GoogleTranslator
videos_df = pd.read_json('../data/source/video-2023-09-14.json', convert_dates=['createTime'])

processed_dir = '../data/processed/'

## Parse the Videos JSON file

- Extract:
  - Comments
  - Likes
- Translate any text

In [12]:
videos_df = videos_df.drop(columns=[
                                    'author',
                                    'challenges',
                                    'collected',
                                    'contents', 
                                    'digged', 
                                    'duetDisplay', 
                                    'forFriend',
                                    'itemCommentStatus', 
                                    'privateItem', 
                                    'secret', 
                                    'shareEnabled', 
                                    'stitchDisplay', 
                                    'officalItem',
                                    'originalItem',
                                    'duetEnabled',
                                    'stitchEnabled',
                                    ])
def translate_text(text):
    translation = GoogleTranslator(source='it', target='en').translate(text)
    return translation  

def parse_desc(desc):
    return translate_text(desc)

def parse_stats(stats):
    return pd.Series([stats['collectCount'], stats['commentCount'], stats['diggCount'], stats['playCount'], stats['shareCount']])

def parse_text_extra(extra_text):
    if extra_text is None:
        return pd.Series([extra_text, extra_text])
    
    extra_text_as_json = pd.json_normalize(extra_text)    
    hashtags = ', '.join(extra_text_as_json['hashtagName'].values)
    return pd.Series([hashtags, translate_text(hashtags)])

def parse_warn_info(warn_info):
    if warn_info is None:
        return warn_info
    
    return warn_info[0]['text']

def parse_video(video):    
    return video['zoomCover']['960']

# desc: translate the video description into English
videos_df['desc_en'] = videos_df.apply(lambda row: parse_desc(row['desc']), axis=1)

# textExtra: extract a list of hashtags
videos_df[['textExtra', 'textExtra_en']] = videos_df.apply(lambda row: parse_text_extra(row['textExtra']), axis=1)

# stats: turn the like, comment, etc. counts into columns
videos_df[['collectCount', 'commentCount', 'diggCount', 'playCount', 'shareCount']] = videos_df.apply(lambda row: parse_stats(row['stats']), axis=1)
videos_df = videos_df.drop(columns=['stats'])

# warnInfo: extract any content warning text 
videos_df['warnInfo'] = videos_df.apply(lambda x: parse_warn_info(x['warnInfo']), axis=1)

# Videos - zoomCover: get the URL of the cover image
videos_df['coverImage'] = videos_df.apply(lambda row: parse_video(row['video']), axis=1)

videos_df


Unnamed: 0,createTime,desc,id,music,video,textExtra,warnInfo,desc_en,textExtra_en,collectCount,commentCount,diggCount,playCount,shareCount,coverImage
0,2023-09-13 18:57:03,"La mia intervista di questa sera a ""Cinque Min...",7278386520275373056,"{'authorName': 'Giorgia Meloni', 'coverLarge':...","{'bitrate': 1178262, 'bitrateInfo': [{'Bitrate...",,,"My interview this evening on ""Cinque Minuti"", ...",,840,2669,14800,435800,438,https://p16-sign-useast2a.tiktokcdn.com/tos-us...
1,2023-09-12 16:05:41,Stiamo dando alla Nazione una strategia che no...,7277971263958666240,"{'authorName': 'Giorgia Meloni', 'coverLarge':...","{'bitrate': 1021716, 'bitrateInfo': [{'Bitrate...",,,We are giving the Nation a strategy that it ha...,,1050,6426,21200,622700,598,https://p16-sign-useast2a.tiktokcdn.com/tos-us...
2,2023-09-11 09:02:24,"Grazie India, complimenti per il successo del ...",7277491085071387648,"{'authorName': 'Giorgia Meloni', 'coverLarge':...","{'bitrate': 1374581, 'bitrateInfo': [{'Bitrate...",g20,,"Thank you India, congratulations on the succes...",g20,313,468,7158,137500,224,https://p16-sign-useast2a.tiktokcdn.com/tos-us...
3,2023-09-03 07:28:10,A 41 anni dal brutale attentato mafioso che ha...,7274498125660703744,"{'authorName': 'Giorgia Meloni', 'coverLarge':...","{'bitrate': 627490, 'bitrateInfo': [{'Bitrate'...",,,41 years after the brutal mafia attack that ca...,,277,274,4874,141400,204,https://p16-sign-useast2a.tiktokcdn.com/tos-us...
4,2023-08-12 06:05:11,Salario minimo: punto stampa dopo l’incontro c...,7266312532665584640,"{'authorName': 'Giorgia Meloni', 'coverLarge':...","{'bitrate': 1077066, 'bitrateInfo': [{'Bitrate...",,,Minimum wage: press point after the meeting wi...,,2847,7144,52600,1800000,1388,https://p16-sign-useast2a.tiktokcdn.com/tos-us...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,2022-02-20 17:31:35,Il risultato del #greenpass è stato solo quell...,7066845965105318912,"{'authorName': 'Giorgia Meloni', 'coverLarge':...","{'bitrate': 998202, 'bitrateInfo': [{'Bitrate'...",greenpass,,The result of the #greenpass was only to manag...,greenpass,2460,2020,54800,977100,4925,https://p16-sign-va.tiktokcdn.com/tos-maliva-p...
220,2022-02-18 17:00:34,Noi vogliamo difendere l’Italia 🇮🇹,7066095800291576832,"{'authorName': 'Giorgia Meloni', 'coverLarge':...","{'bitrate': 710139, 'bitrateInfo': [{'Bitrate'...",,,We want to defend Italy 🇮🇹,,30,41,2052,37700,37,https://p16-sign-va.tiktokcdn.com/tos-maliva-p...
221,2022-02-11 14:53:48,Sia chiaro a tutti: noi il governo con il PD e...,7063465536109235200,"{'authorName': 'Giorgia Meloni', 'coverLarge':...","{'bitrate': 401098, 'bitrateInfo': [{'Bitrate'...",,,Let it be clear to everyone: we will never for...,,22,40,1482,33000,21,https://p16-sign-va.tiktokcdn.com/tos-maliva-p...
222,2022-02-10 19:07:13,Cittadini e imprese schiacciati dalla crisi e ...,7063159758185827328,"{'authorName': 'Giorgia Meloni', 'coverLarge':...","{'bitrate': 326414, 'bitrateInfo': [{'Bitrate'...",,,Citizens and businesses crushed by the crisis ...,,28,202,2240,47300,126,https://p16-sign-va.tiktokcdn.com/tos-maliva-p...


Write out the results to a CSV file. 

In [14]:
videos_df.to_json('../data/processed/videos-translated.json')
videos_df.to_csv('../data/processed/videos-translated.csv', sep=',')

videos_df[['createTime', 'textExtra_en','desc_en']].to_csv('../data/processed/hashtags-and-captions-translated.csv', sep=',')

## Download Resources

In [82]:
# def download_file_and_save(url: str, filepath: Path):
#     """Download a file from a specified URL and write its contents to a file"""

#     r = _get(url=url)
#     if r.status_code == 403:
#         return
#     ext = r.headers["Content-Type"].split("/")[-1]
#     path_with_ext = filepath.with_suffix(f".{ext}")
#     with open(path_with_ext, "wb") as f:
#         f.write(r.content)
#         logger.debug(f"Saved file to: {path_with_ext}")

videos_df['coverImage']

# get the video Duration


0      https://p16-sign-useast2a.tiktokcdn.com/tos-us...
1      https://p16-sign-useast2a.tiktokcdn.com/tos-us...
2      https://p16-sign-useast2a.tiktokcdn.com/tos-us...
3      https://p16-sign-useast2a.tiktokcdn.com/tos-us...
4      https://p16-sign-useast2a.tiktokcdn.com/tos-us...
                             ...                        
219    https://p16-sign-va.tiktokcdn.com/tos-maliva-p...
220    https://p16-sign-va.tiktokcdn.com/tos-maliva-p...
221    https://p16-sign-va.tiktokcdn.com/tos-maliva-p...
222    https://p16-sign-va.tiktokcdn.com/tos-maliva-p...
223    https://p16-sign-va.tiktokcdn.com/tos-maliva-p...
Name: coverImage, Length: 224, dtype: object

## Download Videos

In [13]:
import logging
import os
import json
from pathlib import Path
from urllib.error import HTTPError
import yt_dlp
from yt_dlp.utils import ExtractorError, DownloadError
from typing import List, Dict, Optional

logger = logging.getLogger(__name__)

def download_videos(url_to_download=None, output_dir=None):
    if len(url_to_download) > 0:
        print(f"Downloading media {url_to_download}")
        logger.info(f"Downloading media for hashtag {url_to_download}")

    ydl_opts = {
        "outtmpl": os.path.join(output_dir, "%(id)s.%(ext)s"),
        "ignore_errors": True,
        "quiet": logger.getEffectiveLevel() > logging.DEBUG,
    }
    
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        try:
            ydl.download([url_to_download])
        except (HTTPError, TypeError, ExtractorError, DownloadError) as e:
            logger.warning(
                f"Encountered error {e} when attempting to download url: {url_to_download}"
            )

def get_video_list(videos_dir):
    already_downloaded_ids = set(
        file.split(".")[0].split("_")[0] for file in os.listdir(videos_dir)
    )
    video_df = pd.json_normalize(videos_df['video'])
    return video_df[~video_df['id'].isin(already_downloaded_ids)]
 
new_videos = get_video_list(videos_dir='../data/processed/videos')
new_videos
for video_id in new_videos['id']:
    print(video_id)
    user_id = '7057902765381534725'
    url = f"https://www.tiktok.com/@{user_id}/video/{video_id}"
    print(url)
    download_videos(url_to_download=url, output_dir='../data/processed/videos')

## Update the Videos CSV/JSON with video duration

In [115]:
def get_video_duration(filename):

    video = cv2.VideoCapture(filename)

    if video is None or not video.isOpened():
       print('Warning: unable to open video source: ', filename)
       return '###### VIDEO NOT FOUND ###########'

    duration = video.get(cv2.CAP_PROP_POS_MSEC)
    fps = video.get(cv2.CAP_PROP_FPS)      
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frame_count/fps

    minutes = int(duration/60)
    seconds = duration%60
    duration_formatted = str(minutes) + ':' + str(round(seconds))

    video.release()
    return duration_formatted

videos_translated_df = pd.read_json(processed_dir + 'videos-translated.json', convert_dates=['createTime'])

def add_duration(file_id):
    file_name = processed_dir + 'videos/' + str(file_id) + '.mp4'
    duration = get_video_duration(file_name)
    
    print(duration)
    return duration

videos_translated_df['duration'] = videos_translated_df.apply(lambda row: add_duration(row['id']), axis=1)

videos_translated_df.to_json(processed_dir + 'videos-translated-with-duration.json')



###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOUND ###########
###### VIDEO NOT FOU

OpenCV: Couldn't read video stream from file "../data/processed/videos/7278386520275373000.mp4"
[ERROR:0@3832.463] global cap.cpp:166 open VIDEOIO(CV_IMAGES): raised OpenCV exception:

OpenCV(4.8.0) /Users/xperience/GHA-OpenCV-Python/_work/opencv-python/opencv-python/opencv/modules/videoio/src/cap_images.cpp:267: error: (-215:Assertion failed) number < max_number in function 'icvExtractPattern'


OpenCV: Couldn't read video stream from file "../data/processed/videos/7277971263958666000.mp4"
[ERROR:0@3832.463] global cap.cpp:166 open VIDEOIO(CV_IMAGES): raised OpenCV exception:

OpenCV(4.8.0) /Users/xperience/GHA-OpenCV-Python/_work/opencv-python/opencv-python/opencv/modules/videoio/src/cap_images.cpp:267: error: (-215:Assertion failed) number < max_number in function 'icvExtractPattern'


OpenCV: Couldn't read video stream from file "../data/processed/videos/7277491085071388000.mp4"
[ERROR:0@3832.464] global cap.cpp:166 open VIDEOIO(CV_IMAGES): raised OpenCV exception:

OpenCV(4.8.0) /

## Convert any Video Files to audio 

In [15]:
import moviepy
import moviepy.editor
import os
import glob

videos_dir = '../data/processed/videos/'
audio_dir = '../data/processed/audio'

if not os.path.exists(audio_dir):
    os.mkdir(audio_dir)

file_name_list = glob.glob(os.path.join(videos_dir, "*.mp4"))
print(len(file_name_list))

for file_name in file_name_list:
    # print(file_name)
    video = moviepy.editor.VideoFileClip(file_name)
    audio = video.audio
    audio_file_name = file_name.replace('videos','audio').replace('.mp4', '.mp3')
    audio.write_audiofile(audio_file_name)

224
MoviePy - Writing audio in ../data/processed/audio/7144425341279341830.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7187471251794005254.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7145754357013777670.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7158424452286401797.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7277971263958666529.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7092798207511612678.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7244545854739172635.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7206775485903260933.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7233290952285244698.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7164382552751394053.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7228211560240467227.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7128429765639343366.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7182913772271521030.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7137577046552726790.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7138094156332715270.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7137699360111938822.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7136469259756031237.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7222341088596610309.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7183669325126012166.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7145877683740658949.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7147206448232074502.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7156890275317632262.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7066095800291577093.mp3


                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7140683479053831429.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7145360998407425285.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7204931077029268741.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7161412406495694086.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7109083244611587333.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7143225656137075974.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7139418655879744774.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7070073900100521222.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7147263913200045317.mp3


                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7265735771271728416.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7136548859093863685.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7107893116505492741.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7240155848205454619.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7216766115110735109.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7243739642934201626.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7174007223310372101.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7136459590434147590.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7206044310301592837.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7094669289294220549.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7081255304721075461.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7189709267392318725.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7227372741660282138.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7180307547616742662.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7145435366529551621.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7067167670214331654.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7143285004917935365.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7237568877436177690.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7198571880997981445.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7211143983282588934.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7135145359629634822.mp3


                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7247012477228076314.mp3


                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7182877365171801349.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7137360677756325126.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7161896989863709958.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7092750010739215621.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7078649936128003334.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7178948277834992901.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7092832806765612293.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7140316758455454982.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7136118025811479814.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7241634208106515738.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7188187795985272069.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7092732219587874053.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7143551183792164102.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7092292016104606982.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7160592391194299654.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7170424345486036229.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7063465536109235461.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7146265492234292486.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7178163802578455813.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7143251423474502917.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7073752853965262086.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7257581884450917659.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7144295242039037190.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7141788005320150277.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7066845965105319173.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7166251050142420230.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7205257187130674437.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7113843601427139846.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7122815617337494789.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7168946059706764550.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7176657919654579462.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7177049020412513542.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7232366148258417946.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7137367409882582277.mp3


                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7221065295304117509.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7223471702020246810.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7157692146278075654.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7133129347828944134.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7215590061742591259.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7238958663094865178.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7234480028908555547.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7138307731299028230.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7138364518136122629.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7176318412900027654.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7092079134247046405.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7210840206986071301.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7142918656551423237.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7178782554814811397.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7106933742027001093.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7070475423716543749.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7140955700699696390.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7116559793489267973.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7143163842665663749.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7226821746425466139.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7144338594927299846.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7261188519618448667.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7134227016500497670.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7250049940255182107.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7092109732386737413.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7277491085071387937.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7078014348152638726.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7184340534251621637.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7106170331500399878.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7134593713753427205.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7060519835788250373.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7141284558791576837.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7132746085545561350.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7137965836253351173.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7145843103075781893.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7278386520275373344.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7235576250306202907.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7141381807093697798.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7136915130884934918.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7146691344281521414.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7221262230988262661.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7137218493681896710.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7259020896466242842.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7251672977257942299.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7111970905525341445.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7074130814199516422.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7217889363739315462.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7189296202347744517.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7070414977118588165.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7180372649367915782.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7239724965312941338.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7231465075389795611.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7215644500771818757.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7115419591593037061.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7086078703641332998.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7194909784926063877.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7206202741440449798.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7211503013234527494.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7257145557309738266.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7210879251527077125.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7189243457179880709.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7139623564637900038.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7173606687327161605.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7152865526417558789.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7216786331005930778.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7146642608033434885.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7069423165268626694.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7146526475439639814.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7198105090119568646.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7157249765837786373.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7130543050006269189.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7067893974035401990.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7234091904151932186.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7156960255723719941.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7136844388176530694.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7177010908625980678.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7260133930643361050.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7146670520505847045.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7208626665683995909.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7142930878216359173.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7135363514704727301.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7123181095293701382.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7109740680011894022.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7247501157361372443.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7163995254054489350.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7249053260105944347.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7144420910336953606.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7104226960422800646.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7137738553127079173.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7205624570915736837.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7136424309131578629.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7201936585745632517.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7175162657949994246.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7207816601217944837.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7132368164540730629.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7262671603471797536.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7137723121796156677.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7138477051790494981.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7132830393153162501.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7144246488879713542.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7142030812693810438.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7128321428012764422.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7265415410848238880.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7146272946183605510.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7141803151136410886.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7144023384853171462.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7144769480244071686.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7243836094456827162.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7137343861130677510.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7260633128325090587.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7104580217892506885.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7219014997781925146.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7063159758185827590.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7261648290305821978.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7198603797227244806.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7134693026680655109.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7133867159503097093.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7187877327806762246.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7144986246215929094.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7274498125660704033.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7148069987796765958.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7202261483860806917.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7187514172463254790.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7138461939923029254.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7222372308235603206.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7265621447920422176.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7074535704902077701.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7231809083169328411.mp3


                                                                     

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7254490393851481371.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7080511603812109574.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7162839147302997254.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7142259434000059654.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7238646546152082714.mp3


                                                                    

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7092396552064453893.mp3


                                                                      

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7266312532665584928.mp3


                                                                        

MoviePy - Done.
MoviePy - Writing audio in ../data/processed/audio/7235728376848272666.mp3


                                                                      

MoviePy - Done.




## Extract Transcribed Text

In [79]:
transcription_dir_location = '../data/processed/transcription/'

def extract_translated_text(file_name: str):
    if file_name == '.DS_Store':
        return
    
    print(f'file_name: {file_name}')
    transcriptions_df = pd.read_json(transcription_dir_location + 'original/' + file_name)
    transcripts_df = transcriptions_df['results']['transcripts'] 
    if len(transcripts_df) > 1:
        print('error')
        raise

    transcript_df = pd.DataFrame.from_dict(transcripts_df[0], orient='index')
    print(transcript_df)
    transcript_df.to_json(f'{transcription_dir_location}/{file_name}')    

for file_name in os.listdir(transcription_dir_location + 'original'):
    extract_translated_text(file_name)



file_name: TranscribeTikTokAudio7216786331005930778.json
                                                            0
transcript  Un altro capitolo importante del nostro lavoro...
file_name: TranscribeTikTokAudio7177010908625980678.json
                                                            0
transcript  ricorrono quest'anno i dieci anni dalla fondaz...
file_name: TranscribeTikTokAudio7178163802578455813.json
           0
transcript  
file_name: TranscribeTikTokAudio7194909784926063877.json
                                                            0
transcript  a cento giorni dall'insediamento del nuovo gov...
file_name: TranscribeTikTokAudio7092079134247046405.json
                                         0
transcript  Sì, ma io la Presidente. Ciao.
file_name: TranscribeTikTokAudio7146670520505847045.json
                                                            0
transcript  sarebbe stato facile in questa campagna eletto...
file_name: TranscribeTikTokAudio716894605970676455