# Download images

## Imports

In [66]:
%pip install google-search-results
%pip install ftfy regex tqdm
%pip install git+https://github.com/openai/CLIP.git
%pip install tree

import os, urllib.request, json
from serpapi import GoogleSearch
from urllib.error import HTTPError
import pandas as pd
import numpy as np
import torch
import clip
import os
import skimage
import IPython.display
import matplotlib.pyplot as plt
from PIL import Image
from collections import OrderedDict

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

pd.set_option("colwidth", None)


## Functions to download images

In [69]:
def check_folders(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [70]:
def get_config(index, lang, titles, num_images):
    config = {
        "lang": lang,
        "query": titles.at[index, lang],
        "label": 'fake' * titles.at[index, 'label'] + 'legit' * (1 - titles.at[index, 'label']),
        "filename": titles.at[index, 'filename'][:-4],
        "num_images": num_images
            }
    return config

In [110]:
def get_google_images(config, serpapi_key, verbosity=0):
    params = {
      "api_key": serpapi_key,
      "engine": "google", #"google",
      "q": config["query"],
      "tbm": "isch"
    }
    path = f"./images/{config['label']}/{config['filename']}/{config['lang']}/"
    check_folders(path)

    search = GoogleSearch(params)
    results = search.get_dict()
    k = 0

    if 'images_results' in results.keys():
        for image in results['images_results']:
            if k == config["num_images"]:
                if verbosity > 0:
                    print(f"Downloaded images for {config['filename']} news in {config['lang']} language.")
                break
            if verbosity > 1:
                print(f"Downloading {k} image for {config['filename']} news in {config['lang']} language.")
            opener=urllib.request.build_opener()
            opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582')]
            urllib.request.install_opener(opener)
            try:
                urllib.request.urlretrieve(image['original'], f"{path}{config['filename']}_{config['lang']}_img{k}.jpg")
                k += 1
            except Exception:
                if verbosity > 1:
                    print("Image skipped due to error")

In [72]:
def download_one_news(index, titles, languages, num_images, serpapi_key, verbosity=0):
    for lang in languages:
        config = get_config(index, lang, titles, num_images)
        get_google_images(config, serpapi_key, verbosity=verbosity)
    print(f"Downloaded all images for {config['filename']} news.")

## Load the titles

In [67]:
titles = pd.read_csv("./125fake_125legit.csv")
titles.head()

Unnamed: 0,file,headline,fr,de,es,ru,is_fake
0,007fake.txt,Jennifer Aniston on the Exact Moment She Had It With the Pregnancy Rumors,Jennifer Aniston au moment exact où elle a eu avec les rumeurs de grossesse.,"Jennifer Aniston in dem Moment, in dem sie es mit den Schwangerschaftsgerüchten hatte.",Jennifer Aniston en el momento exacto que lo tuvo con los rumores de embarazo.,"Дженнифер Энистон в тот момент, когда у нее есть слухи о беременности.",1
1,022fake.txt,Taylor Swift 'files documents to launch streaming service',Taylor Swift «Documents de fichiers pour lancer le service de streaming».,"Taylor Swift 'Dateien dokumentiert, um den Streaming -Dienst zu starten'.",Taylor Swift 'archiva documentos para iniciar el servicio de transmisión'.,Taylor Swift 'Files Documents для запуска потоковой службы'.,1
2,009fake.txt,Miley Cyrus Wedding Rumors: Will Miley Force Liam Hemsworth To Sign A Prenup?,Miley Cyrus Rumeurs de mariage: Miley forcera-t-elle Liam Hemsworth à signer un contrat de contrat?.,"Miley Cyrus Hochzeitsgerüchte: Wird Miley Liam Hemsworth dazu zwingen, ein Prenup zu unterschreiben?.",Rumores de boda de Miley Cyrus: ¿Miley forzará a Liam Hemsworth firmar un prenupc?.,Свадебные слухи Майли Сайрус: Сможет ли Майли Форс Лиам Хемсворт подписать премьер?.,1
3,024fake.txt,Justin Bieber pokes fun at himself after stepping out with an wet mark,Justin Bieber se moque de lui-même après avoir quitté une marque humide.,"Justin Bieber macht sich lustig an sich, nachdem er mit einer nassen Marke ausgetreten ist.",Justin Bieber se burla de sí mismo después de salir con una marca húmeda.,"Джастин Бибер высмеивает себя после того, как вышел с влажной отметки.",1
4,002fake.txt,Brad Pitt Texts Jennifer Aniston Nonstop: Seeks Intense Emotional Support After Angelina Jolie Divorce! | Celeb Dirty Laundry,Brad Pitt envoie des SMS à Jennifer Aniston sans escale: cherche un soutien émotionnel intense après le divorce d'Angelina Jolie! | Laverie sale de célébrité.,Brad Pitt Texte Jennifer Aniston Nonstop: sucht intensive emotionale Unterstützung nach der Scheidung von Angelina Jolie! | Promi Dirty Wäsche.,Brad Pitt texts Jennifer Aniston Nonstop: ¡busca un intenso apoyo emocional después de Angelina Jolie Divorce! | Celeb Dirty Laundry.,Брэд Питт текстов Дженнифер Энистон Неустановка: ищет интенсивную эмоциональную поддержку после развода Анджелины Джоли! | Знаменитость грязное прачечная.,1


In [73]:
titles = titles.rename(columns={'headline': 'en', 'file': 'filename', 'is_fake': 'label'})
titles.head()

Unnamed: 0,filename,en,fr,de,es,ru,label
0,007fake.txt,Jennifer Aniston on the Exact Moment She Had It With the Pregnancy Rumors,Jennifer Aniston au moment exact où elle a eu avec les rumeurs de grossesse.,"Jennifer Aniston in dem Moment, in dem sie es mit den Schwangerschaftsgerüchten hatte.",Jennifer Aniston en el momento exacto que lo tuvo con los rumores de embarazo.,"Дженнифер Энистон в тот момент, когда у нее есть слухи о беременности.",1
1,022fake.txt,Taylor Swift 'files documents to launch streaming service',Taylor Swift «Documents de fichiers pour lancer le service de streaming».,"Taylor Swift 'Dateien dokumentiert, um den Streaming -Dienst zu starten'.",Taylor Swift 'archiva documentos para iniciar el servicio de transmisión'.,Taylor Swift 'Files Documents для запуска потоковой службы'.,1
2,009fake.txt,Miley Cyrus Wedding Rumors: Will Miley Force Liam Hemsworth To Sign A Prenup?,Miley Cyrus Rumeurs de mariage: Miley forcera-t-elle Liam Hemsworth à signer un contrat de contrat?.,"Miley Cyrus Hochzeitsgerüchte: Wird Miley Liam Hemsworth dazu zwingen, ein Prenup zu unterschreiben?.",Rumores de boda de Miley Cyrus: ¿Miley forzará a Liam Hemsworth firmar un prenupc?.,Свадебные слухи Майли Сайрус: Сможет ли Майли Форс Лиам Хемсворт подписать премьер?.,1
3,024fake.txt,Justin Bieber pokes fun at himself after stepping out with an wet mark,Justin Bieber se moque de lui-même après avoir quitté une marque humide.,"Justin Bieber macht sich lustig an sich, nachdem er mit einer nassen Marke ausgetreten ist.",Justin Bieber se burla de sí mismo después de salir con una marca húmeda.,"Джастин Бибер высмеивает себя после того, как вышел с влажной отметки.",1
4,002fake.txt,Brad Pitt Texts Jennifer Aniston Nonstop: Seeks Intense Emotional Support After Angelina Jolie Divorce! | Celeb Dirty Laundry,Brad Pitt envoie des SMS à Jennifer Aniston sans escale: cherche un soutien émotionnel intense après le divorce d'Angelina Jolie! | Laverie sale de célébrité.,Brad Pitt Texte Jennifer Aniston Nonstop: sucht intensive emotionale Unterstützung nach der Scheidung von Angelina Jolie! | Promi Dirty Wäsche.,Brad Pitt texts Jennifer Aniston Nonstop: ¡busca un intenso apoyo emocional después de Angelina Jolie Divorce! | Celeb Dirty Laundry.,Брэд Питт текстов Дженнифер Энистон Неустановка: ищет интенсивную эмоциональную поддержку после развода Анджелины Джоли! | Знаменитость грязное прачечная.,1


## Downloading images

In [None]:
serpapi_key = 'your_key'
languages = ['de', 'fr', 'ru', 'en', 'es']
num_images = 10

for index in range(0,len(titles)):
    print(f'Downloading images for {index} news...')
    download_one_news(index, titles, languages, num_images, serpapi_key, verbosity=2)

# Make new features with CLIP

## Load images from folders

In [144]:
def load_images(index, titles, languages = ['de', 'es', 'ru', 'en', 'fr']):
    config = get_config(index, languages[0], titles, 0)
    data_dir = f"./images/{config['label']}/{config['filename']}/"
    original_images = {}
    images = {}

    for lang in languages:
        config = get_config(index, lang, titles, 0)
        original_images[lang] = []
        images[lang] = []
        data_dir_lang = data_dir + f"{config['lang']}/"            
        n_images_exist = len(os.listdir(data_dir_lang))
        for i in range(n_images_exist):
            try:
                filename = data_dir_lang + f"{config['filename']}_{config['lang']}_img{i}.jpg"
                image = Image.open(filename).convert("RGB")
                original_images[lang].append(image)
                images[lang].append(preprocess(image))
            except FileNotFoundError:
                pass
            
    return original_images, images

## Calculate cosine similarities between images

In [187]:
def cos_sims_for_one_news(index, titles, languages, language_pairs, model):
    original_images, images = load_images(index, titles, languages = languages)
    lang_images_dict = {}
    
    for lang in images:
        if len(images[lang]) > 0:
            lang_images_dict[lang] = torch.tensor(np.stack(images[lang])).cuda()
        else:
            lang_images_dict[lang] = None
    
    def sim_score(pair, image_features):
        return (image_features[pair[0]].cpu().numpy() @ image_features[pair[1]].cpu().numpy().T).mean()
    
    with torch.no_grad():
        image_features = {}
        for lang in lang_images_dict.keys():
            if lang_images_dict[lang] is not None:
                image_features[lang] = model.encode_image(lang_images_dict[lang]).float()
                image_features[lang] /= image_features[lang].norm(dim=-1, keepdim=True)
            else:
                image_features[lang] = torch.zeros([1, 512]).cuda()
        similarities = {pair: sim_score(pair, image_features) for pair in language_pairs}
    return similarities

## Initialize model, calculate similarities 

In [None]:
# #!g1.1
model, preprocess = clip.load("ViT-B/32")
model.cuda().eval()
language_pairs = [('en', 'de'), ('en', 'es'), ('en', 'fr'), ('en', 'ru')]
for pair in language_pairs:
    titles[f'{pair[0]}-{pair[1]}'] = 0

In [None]:
#!g1.1

for index in range(0, len(titles)):
    print(f'Calculating similarities for {index} news...')
    similarities = cos_sims_for_one_news(index, titles, languages=languages, language_pairs=language_pairs, model=model)
#     print(similarities)
    for pair in similarities:
        titles.loc[index, f'{pair[0]}-{pair[1]}'] = similarities[pair]   


In [189]:
titles.head(30)

Unnamed: 0,filename,en,fr,de,es,ru,label,en-de,en-es,en-fr,en-ru
0,007fake.txt,Jennifer Aniston on the Exact Moment She Had I...,Jennifer Aniston au moment exact où elle a eu ...,"Jennifer Aniston in dem Moment, in dem sie es ...",Jennifer Aniston en el momento exacto que lo t...,"Дженнифер Энистон в тот момент, когда у нее ес...",1,0.0,0.0,0.0,0.0
1,022fake.txt,Taylor Swift 'files documents to launch stream...,Taylor Swift «Documents de fichiers pour lance...,"Taylor Swift 'Dateien dokumentiert, um den Str...",Taylor Swift 'archiva documentos para iniciar ...,Taylor Swift 'Files Documents для запуска пото...,1,0.0,0.0,0.0,0.0
2,009fake.txt,Miley Cyrus Wedding Rumors: Will Miley Force L...,Miley Cyrus Rumeurs de mariage: Miley forcera-...,Miley Cyrus Hochzeitsgerüchte: Wird Miley Liam...,Rumores de boda de Miley Cyrus: ¿Miley forzará...,Свадебные слухи Майли Сайрус: Сможет ли Майли ...,1,0.0,0.0,0.0,0.0
3,024fake.txt,Justin Bieber pokes fun at himself after stepp...,Justin Bieber se moque de lui-même après avoir...,"Justin Bieber macht sich lustig an sich, nachd...",Justin Bieber se burla de sí mismo después de ...,"Джастин Бибер высмеивает себя после того, как ...",1,0.0,0.603129,0.593135,0.628361
4,002fake.txt,Brad Pitt Texts Jennifer Aniston Nonstop: Seek...,Brad Pitt envoie des SMS à Jennifer Aniston sa...,Brad Pitt Texte Jennifer Aniston Nonstop: such...,Brad Pitt texts Jennifer Aniston Nonstop: ¡bus...,Брэд Питт текстов Дженнифер Энистон Неустановк...,1,0.676341,0.598541,0.665803,0.679312
5,014fake.txt,Kanye West -- 'Nervous Breakdown',KANYE WEST - «Débarquement nerveux».,Kanye West - 'Nervenzusammenbruch'.,Kanye West - 'Guponado nervioso'.,Канье Уэст - «нервный срыв».,1,0.76592,0.741485,0.71617,0.755794
6,021fake.txt,Are Taylor Swift and Jake Gyllenhaal Dating Ag...,Taylor Swift et Jake Gyllenhaal se dressent à ...,Sind Taylor Swift und Jake Gyllenhaal wieder z...,¿Taylor Swift y Jake Gyllenhaal salen de nuevo?.,Тейлор Свифт и Джейк Джилленхал снова встречаю...,1,0.701117,0.691496,0.696562,0.659121
7,013fake.txt,Kanye West Wants to Enter Cosmetics Business L...,Kanye West veut entrer dans le secteur des cos...,Kanye West will wie Kylie Kosmetikgeschäft bet...,Kanye West quiere ingresar a un negocio de cos...,"Канье Уэст хочет войти в косметический бизнес,...",1,0.651074,0.742563,0.681806,0.703384
8,010fake.txt,Kristen Stewart Drops 'Twilight' Movie Plans W...,Kristen Stewart laisse tomber les plans de fil...,Kristen Stewart Drops 'Twilight' Filmpläne mit...,Kristen Stewart presenta planes de películas '...,Кристен Стюарт бросает планы фильмов «Сумерки»...,1,0.64807,0.65549,0.668335,0.646924
9,012fake.txt,Kanye West Was Designing Looks For Melania Tru...,Kanye West concevait Looks pour Melania Trump ...,Kanye West suchte nach Melania Trump am Einwei...,Kanye West estaba diseñando Looks para Melania...,Канье Уэст разрабатывал поиски Мелании Трамп в...,1,0.601848,0.625723,0.0,0.0


## Save csv file with new features

In [191]:
titles.to_csv("./titles_with_image_similarities.csv", index = False)