<a href="https://colab.research.google.com/github/azernik/semeval_2025_task1/blob/main/shubham_admire_experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# for downloading the train zip from Drive
!pip install -q gdown

# install clip
!pip install -q ftfy regex tqdm
!pip install -q git+https://github.com/openai/CLIP.git

  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
import gdown

# download taskA file from Adam's Drive (public) and unzip
file_id = "105JdQU_u98w_xSYaNNSj-r4RsyTPXZEF"
url = f"https://drive.google.com/uc?id={file_id}"
gdown.download(url, "taskA.zip", quiet=True)
! unzip -q - taskA.zip

In [None]:
import os
import pandas as pd

# define locations
taska_folder = "train"
taska_tsv_filename = "subtask_a_train.tsv"

# load data
df = pd.read_csv(f"{taska_folder}/{taska_tsv_filename}", delimiter="\t")

**Experiment 1: Similarity b/w Image Captions & Sentence**
- Based on the similarity, ranking the **images**

In [None]:
import torch
import clip
from PIL import Image
import pandas as pd
from ast import literal_eval
import csv
import nltk
from nltk.corpus import stopwords
from scipy.stats import spearmanr

# Download NLTK stopwords
nltk.download('stopwords', quiet=True)
stop_words = set(stopwords.words('english'))

# Load CLIP model and preprocessing
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)


def remove_stopwords(text):
    """Remove stopwords from text."""
    words = text.split()
    filtered_words = [word for word in words if word.lower() not in stop_words]
    return " ".join(filtered_words)

def truncate_text(text, max_length=50):
    """Truncate text to ensure it fits within the token limit."""
    words = text.split()
    return " ".join(words[:max_length])

def process_captions(captions, max_length=77):
    processed_captions = []
    for i in range(0,5):
        filtered_caption = remove_stopwords(captions[i])

        # Truncate if too long

        # filtered_caption = truncate_text(filtered_caption, max_length=max_length - 20)  # Buffer for special tokens
        processed_captions.append(filtered_caption)
    return processed_captions

def evaluate_predictions(predictions, df, weights=[0.4, 0.3, 0.2, 0.1, 0.0]):
    """
    Takes predictions, returns three types of evaluation metrics:
    - Top-1 Accuracy
    - Average Spearman Correlation
    - Average Weighted Accuracy
    """
    correct_top1 = 0
    spearman_scores, weighted_scores = [], []

    for i in range(len(df)):
        if len(predictions[i]) == 0:
            continue

        # Predictions and ground truth
        pred_order = predictions[i]
        ground_truth_order = df["expected_order_indexed"].iloc[i]

        # Top-1 accuracy
        if pred_order[0] == ground_truth_order[0]:
            correct_top1 += 1

        # Spearman correlation
        score, _ = spearmanr(pred_order, ground_truth_order)
        spearman_scores.append(score)

        # Weighted accuracy
        weighted_score = sum(weights[j] for j, img in enumerate(pred_order) if img == ground_truth_order[j])
        weighted_scores.append(weighted_score)

    return {
        "top1_accuracy": correct_top1 / len(df),
        "average_spearman": sum(spearman_scores) / len(spearman_scores),
        "average_weighted_accuracy": sum(weighted_scores) / len(weighted_scores),
    }

def encode_text(text):
    """
    Encode text into embeddings using CLIP.
    """
    text_tokens = clip.tokenize([text], truncate=True).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_tokens)
    return text_features / text_features.norm(dim=-1, keepdim=True)

def rank_images_by_caption_similarity(sentence, captions):
    """
    Rank images based on similarity between sentence and image captions.
    :param sentence: The input sentence for the row.
    :param captions: List of captions corresponding to images.
    :return: Ranked indices of images based on similarity scores.
    """
    # Encode the sentence
    sentence_embedding = encode_text(sentence)

    # Encode captions
    caption_embeddings = [encode_text(caption) for caption in captions]

    # Compute similarities
    similarities = [torch.cosine_similarity(sentence_embedding, caption_embedding, dim=-1).item()
                    for caption_embedding in caption_embeddings]

    # Rank indices based on similarities
    ranked_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)
    return ranked_indices, similarities

def save_predictions(df, predictions, similarities, evaluation_metrics):
    """
    Save detailed predictions, similarity scores, and evaluation metrics for each example.
    """
    # Save predictions and similarities
    preds_file = "predictions_with_similarities.csv"
    with open(preds_file, mode="w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["index", "compound", "ground_truth_order", "predicted_order", "similarity_scores"])

        for i, (pred, sim) in enumerate(zip(predictions, similarities)):
            # Predicted order is directly the ranked images
            pred_order = pred
            # Ground truth order from the expected_order_indexed column
            ground_truth_order = df["expected_order_indexed"].iloc[i]
            writer.writerow([i, df["compound"].iloc[i], ground_truth_order, pred_order, [s for s in sim]])

    print(f"Predictions saved to {preds_file}")

    # Save evaluation metrics
    metrics_file = "evaluation_metrics.txt"
    with open(metrics_file, mode="w") as f:
        for metric, value in evaluation_metrics.items():
            f.write(f"{metric}: {value:.4f}\n")

    print(f"Evaluation metrics saved to {metrics_file}")




In [None]:
all_rankings = []
all_similarities = []

df["expected_order_indexed"] = None
expected_row_indices = []
for idx, row in df.iterrows():
    sentence = row["sentence"]
    captions = [row[f"image{i}_caption"] for i in range(1, 6)]
    image_names = {row[f"image{image}_name"] : idx for idx, image in enumerate(range(1,6))}
    expected_order = [image_names[image] for image in literal_eval(row['expected_order'])]

    expected_row_indices.append(expected_order)

    captions = process_captions(captions)

    # Rank images by similarity to the sentence
    rankings, similarities = rank_images_by_caption_similarity(sentence, captions)


    # Store rankings and similarities
    all_rankings.append(rankings)

    all_similarities.append(similarities)

df["expected_order_indexed"] = expected_row_indices
df["ranked_images"] = all_rankings
df["similarities"] = all_similarities

evaluation_metrics = evaluate_predictions(df['ranked_images'], df)
save_predictions(df, df['ranked_images'], df["similarities"], evaluation_metrics)


# Save results to a CSV (optional)
df.to_csv("ranked_images_based_on_captions.csv", index=False)


print(f"\nResults for Experiment 1:")
print(f"Top-1 Accuracy: {evaluation_metrics['top1_accuracy'] * 100:.2f}%")
print(f"Average Spearman Correlation: {evaluation_metrics['average_spearman']:.2f}")
print(f"Average Weighted Accuracy: {evaluation_metrics['average_weighted_accuracy']:.2f}")

Predictions saved to predictions_with_similarities.csv
Evaluation metrics saved to evaluation_metrics.txt

Results for Experiment 1:
Top-1 Accuracy: 21.43%
Average Spearman Correlation: -0.04
Average Weighted Accuracy: 0.23


Experiment 2: Similarity b/w Summarized Image Captions and Paraphrased NC  
- DO NOT RUN, it will take forever and will crash eventually

In [None]:
import torch
import clip
from PIL import Image
import pandas as pd
from ast import literal_eval
import csv
import nltk
from nltk.corpus import stopwords
from scipy.stats import spearmanr
from transformers import pipeline

# Download NLTK stopwords
nltk.download('stopwords', quiet=True)
stop_words = set(stopwords.words('english'))

# Load CLIP model and preprocessing
device = "cuda" if torch.cuda.is_available() else "cpu"
summarizer = pipeline("text-generation", model="meta-llama/Llama-2-7b-chat-hf")

model, preprocess = clip.load("ViT-B/32", device=device)


def remove_stopwords(text):
    """Remove stopwords from text."""
    words = text.split()
    filtered_words = [word for word in words if word.lower() not in stop_words]
    return " ".join(filtered_words)

def truncate_text(text, max_length=50):
    """Truncate text to ensure it fits within the token limit."""
    words = text.split()
    return " ".join(words[:max_length])

def process_captions(captions, max_length=77):
    processed_captions = []
    for i in range(0,5):
        filtered_caption = remove_stopwords(captions[i])

        # Truncate if too long

        filtered_caption = truncate_text(filtered_caption, max_length=max_length - 20)  # Buffer for special tokens
        processed_captions.append(filtered_caption)
    return processed_captions

def evaluate_predictions(predictions, df, weights=[0.4, 0.3, 0.2, 0.1, 0.0]):
    """
    Takes predictions, returns three types of evaluation metrics:
    - Top-1 Accuracy
    - Average Spearman Correlation
    - Average Weighted Accuracy
    """
    correct_top1 = 0
    spearman_scores, weighted_scores = [], []

    for i in range(len(df)):
        if len(predictions[i]) == 0:
            continue

        # Predictions and ground truth
        pred_order = predictions[i]
        ground_truth_order = df["expected_order_indexed"].iloc[i]

        # Top-1 accuracy
        if pred_order[0] == ground_truth_order[0]:
            correct_top1 += 1

        # Spearman correlation
        score, _ = spearmanr(pred_order, ground_truth_order)
        spearman_scores.append(score)

        # Weighted accuracy
        weighted_score = sum(weights[j] for j, img in enumerate(pred_order) if img == ground_truth_order[j])
        weighted_scores.append(weighted_score)

    return {
        "top1_accuracy": correct_top1 / len(df),
        "average_spearman": sum(spearman_scores) / len(spearman_scores),
        "average_weighted_accuracy": sum(weighted_scores) / len(weighted_scores),
    }

def encode_text(text):
    """
    Encode text into embeddings using CLIP.
    """
    text_tokens = clip.tokenize([text], truncate=True).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_tokens)
    return text_features / text_features.norm(dim=-1, keepdim=True)

def rank_images_by_caption_similarity(sentence, captions):
    """
    Rank images based on similarity between sentence and image captions.
    :param sentence: The input sentence for the row.
    :param captions: List of captions corresponding to images.
    :return: Ranked indices of images based on similarity scores.
    """
    # Encode the sentence
    # print(sentence)
    sentence_embedding = encode_text(sentence)

    # Encode captions
    caption_embeddings = [encode_text(caption) for caption in captions]

    # Compute similarities
    similarities = [torch.cosine_similarity(sentence_embedding, caption_embedding, dim=-1).item()
                    for caption_embedding in caption_embeddings]

    # Rank indices based on similarities
    ranked_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)
    return ranked_indices, similarities

def save_predictions(df, predictions, similarities, evaluation_metrics):
    """
    Save detailed predictions, similarity scores, and evaluation metrics for each example.
    """
    # Save predictions and similarities
    preds_file = "predictions_with_similarities_1.csv"
    with open(preds_file, mode="w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["index", "compound", "ground_truth_order", "predicted_order", "similarity_scores"])

        for i, (pred, sim) in enumerate(zip(predictions, similarities)):
            # Predicted order is directly the ranked images
            pred_order = pred
            # Ground truth order from the expected_order_indexed column
            ground_truth_order = df["expected_order_indexed"].iloc[i]
            writer.writerow([i, df["compound"].iloc[i], ground_truth_order, pred_order, [s for s in sim]])

    print(f"Predictions saved to {preds_file}")

    # Save evaluation metrics
    metrics_file = "evaluation_metrics_1.txt"
    with open(metrics_file, mode="w") as f:
        for metric, value in evaluation_metrics.items():
            f.write(f"{metric}: {value:.4f}\n")

    print(f"Evaluation metrics saved to {metrics_file}")

def summarize_captions(captions, max_tokens=77):
    summarized_captions = []

    for caption in captions:
        summarized = summarizer([{"role": "user", "content": f"Summarize this to fit within {max_tokens} tokens: {caption}"}], max_new_tokens=max_tokens,  # Limit the output length
        return_full_text=False)[0]["generated_text"]
        summarized = summarized.strip().replace("\n", "")
        prefix = "Here is a summary of the image in 77 tokens or less:"
        if summarized.startswith(prefix):
            summarized = summarized[len(prefix):]
        summarized_captions.append(summarized)
    return summarized_captions

# Example DataFrame setup
# Ensure the DataFrame has columns for "sentence", "image1_caption", ..., "image5_caption"
df = pd.read_csv("subtaska_dataset/subtask_a_train.tsv", delimiter="\t")

df_meaning = pd.read_csv("train_df_with_paraphrased.csv")
df_meaning = df_meaning[['compound', 'sentence', 'sentence_type', 'paraphrased_nc']]
df_meaning['paraphrased_nc'] = "The compound " + df_meaning["compound"] + " means " + df_meaning['paraphrased_nc']
df_meaning['paraphrased_nc'] = df_meaning['paraphrased_nc'].apply(lambda x: x.lower())

df = pd.merge(df, df_meaning[['compound', 'paraphrased_nc']], on='compound', how='inner')




# Process each row to rank images based on caption similarity to the sentence
all_rankings = []
all_similarities = []

df["expected_order_indexed"] = None
expected_row_indices = []
for idx, row in df.iterrows():
    # sentence = row["sentence"]
    sentence = row['paraphrased_nc']
    captions = [row[f"image{i}_caption"] for i in range(1, 6)]
    image_names = {row[f"image{image}_name"] : idx for idx, image in enumerate(range(1,6))}
    expected_order = [image_names[image] for image in literal_eval(row['expected_order'])]

    expected_row_indices.append(expected_order)

    # captions = process_captions(captions)
    captions = summarize_captions(captions)

    # Rank images by similarity to the sentence
    rankings, similarities = rank_images_by_caption_similarity(sentence, captions)


    # Store rankings and similarities
    all_rankings.append(rankings)

    all_similarities.append(similarities)

df["expected_order_indexed"] = expected_row_indices
df["ranked_images"] = all_rankings
df["similarities"] = all_similarities

evaluation_metrics = evaluate_predictions(df['ranked_images'], df)
save_predictions(df, df['ranked_images'], df["similarities"], evaluation_metrics)

# Save results to a CSV (optional)
df.to_csv("ranked_images_based_on_captions_1.csv", index=False)


print(f"\nResults for Experiment 2:")
print(f"Top-1 Accuracy: {evaluation_metrics['top1_accuracy'] * 100:.2f}%")
print(f"Average Spearman Correlation: {evaluation_metrics['average_spearman']:.2f}")
print(f"Average Weighted Accuracy: {evaluation_metrics['average_weighted_accuracy']:.2f}")


Experiment 3: FLUTE Augmentation to the training set

In [None]:
import pandas as pd
df_meaning = pd.read_csv("train_df_with_paraphrased.csv")

In [None]:
df_meaning.columns

Index(['compound', 'subset', 'sentence_type', 'sentence', 'expected_order',
       'image1_name', 'image1_caption', 'image2_name', 'image2_caption',
       'image3_name', 'image3_caption', 'image4_name', 'image4_caption',
       'image5_name', 'image5_caption', 'compound_dir', 'image_paths',
       'image_label_map', 'expected_order_readable', 'paraphrased_nc'],
      dtype='object')

In [None]:
df_meaning = df_meaning[['compound', 'sentence', 'sentence_type', 'paraphrased_nc']]

In [None]:
df_meaning['paraphrased_nc'] = "The compound " + df_meaning["compound"] + " means " + df_meaning['paraphrased_nc']
df_meaning['paraphrased_nc'] = df_meaning['paraphrased_nc'].apply(lambda x: x.lower())


In [None]:
print(df_meaning.iloc[0])

compound                                               elbow grease
sentence          It took a lot of elbow grease to get the old e...
sentence_type                                             idiomatic
paraphrased_nc    the compound elbow grease means manual labor o...
Name: 0, dtype: object


In [None]:
print(df_meaning["sentence_type"])

0     idiomatic
1     idiomatic
2     idiomatic
3     idiomatic
4     idiomatic
        ...    
65      literal
66    idiomatic
67    idiomatic
68      literal
69      literal
Name: sentence_type, Length: 70, dtype: object


In [None]:
import pandas as pd

# Example DataFrames
df = pd.DataFrame({
    'compound': ['elbow grease', 'night owl', 'spill the beans'],
    'sentence': ['It took a lot of elbow grease.', 'The night owl stayed awake.', 'He spilled the beans.']
})

df_meaning = pd.DataFrame({
    'compound': ['elbow grease', 'night owl', 'spill the beans'],
    'paraphrased_nc': ['manual effort', 'a person who stays up late', 'to reveal a secret']
})

# Merge DataFrames based on the 'compound' column
df_combined = pd.merge(df, df_meaning[['compound', 'paraphrased_nc']], on='compound', how='inner')


# Optionally stack or align as needed
# Example: Creating a stacked column view
# df_combined['stacked_info'] = df_combined['sentence'] + " | " + df_combined['paraphrased_nc']


df_combined.head()

Unnamed: 0,compound,sentence,paraphrased_nc
0,elbow grease,It took a lot of elbow grease.,manual effort
1,night owl,The night owl stayed awake.,a person who stays up late
2,spill the beans,He spilled the beans.,to reveal a secret


**FLUTE Dataset**

In [None]:
import pyarrow.parquet as pq
file_path = '/content/query_result.parquet'
table = pq.read_table(file_path)
df_flute = table.to_pandas()

Unnamed: 0,id,hypothesis,premise,idiom,label,explanation,split,type
0,10,She gets to waste her days sucking back tequil...,She gets to waste her days sucking back tequil...,bring home the bacon,Contradiction,To bring home the bacon means to earn a salary...,train,Idiom
1,15,"It's the same with bardem, only to the nth deg...","It's the same with bardem, only even more so.",to the nth degree,Entailment,To the nth degree means to the highest level o...,train,Idiom
2,16,I will not allow you to kill the girl in cold ...,I will not allow you to kill the girl intentio...,in cold blood,Entailment,To kill in cold blood means to kill without fe...,train,Idiom
3,18,Were you in cahoots with this pole cat?,Were you cooperating closely and secretively w...,in cahoots,Entailment,"To be in cahoots is to be in close, often secr...",train,Idiom
4,22,"I know ty's not all there,nick said, tapping h...","I know ty's not mentally adequate, nick said, ...",not all there,Entailment,To be not all there means to not be mentally a...,train,Idiom


In [None]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=df_flute)

https://docs.google.com/spreadsheets/d/1sbEad9YfW5PGDWtGeBWYJx_pAY33YQgGdw7-DWRws4w#gid=0


In [None]:
df.head()

Unnamed: 0,compound,subset,sentence_type,sentence,expected_order,image1_name,image1_caption,image2_name,image2_caption,image3_name,image3_caption,image4_name,image4_caption,image5_name,image5_caption
0,elbow grease,Train,idiomatic,It took a lot of elbow grease to get the old e...,"['35234427395.png', '53378381715.png', '399382...",35234427395.png,The image depicts a hand holding a sponge and ...,39938261459.png,The image depicts a hand wearing a yellow work...,53378381715.png,The image depicts a hand holding a duster with...,54879908369.png,The image depicts a person wearing knee pads a...,74852536462.png,The image depicts a person wearing a black out...
1,night owl,Train,idiomatic,"It's a constant battle for us, as he is a morn...","['61697797701.png', '93189810779.png', '893752...",00982495584.png,The image depicts a nighttime scene with a lar...,61697797701.png,The image depicts a cartoon-style illustration...,89375227504.png,The image depicts a cartoon-style owl perched ...,93189810779.png,The image depicts a cartoon-style illustration...,93541983868.png,"The image depicts a dumbbell, which is a commo..."
2,heart of gold,Train,idiomatic,Even the somewhat seedy failed private eye has...,"['86137977215.png', '78062290185.png', '542405...",54240592941.png,"The image depicts a large, metallic safe with ...",78062290185.png,The image depicts a joyful scene featuring a y...,86137977215.png,The image depicts a cartoon scene where a youn...,90660547751.png,"The image depicts a futuristic, stylized space...",92088849364.png,"The image depicts a stylized, artistic represe..."
3,agony aunt,Sample,idiomatic,ESA's Space Weather Office is like Europe's st...,"['83600499282.png', '57658144685.png', '025128...",02512838127.png,The image depicts a serene outdoor scene featu...,32964421720.png,The image depicts a cartoon-style illustration...,57658144685.png,The image depicts a scene of a person sitting ...,83600499282.png,"The image depicts a person sitting at a desk, ...",92533456778.png,The image depicts a cartoon character of a wom...
4,shrinking violet,Train,idiomatic,"This aged, rich wine is no shrinking violet wi...","['77861539717.png', '68016869942.png', '118443...",11844321898.png,The image depicts a bouquet of purple tulips a...,45394842176.png,"The image depicts a magnifying glass, commonly...",68016869942.png,"The image depicts a small, cute puppy standing...",77861539717.png,The image depicts an animated character with c...,97482048489.png,The image is a colorful and vibrant illustrati...


In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [None]:
from datasets import load_dataset

ds = load_dataset("ColumbiaNLP/FLUTE")

In [None]:
df_flute_dataset = ds['train'].to_pandas()

In [None]:
df_flute_dataset.head()

Unnamed: 0,id,hypothesis,premise,idiom,label,explanation,split,type
0,1,I was gone for only a few days and my consider...,I left my adult son home for a few days and ju...,,Contradiction,Most people would not consider leaving dirty d...,train,Sarcasm
1,2,it was such a pleasant sight to see a guy pick...,I just caught a guy picking up used chewing gu...,,Contradiction,Picking up used chewing gum is really unhygien...,train,Sarcasm
2,3,Their sudden appearance in the farmhouse was l...,You could feel their sudden appearance in the ...,,Entailment,A gust of arctic wind is an icy blast that is ...,train,Simile
3,4,"April's had never left, like a toothpaste comm...","April's had never left, perfectly white and fu...",,Entailment,A toothpaste commercial is something that is a...,train,Simile
4,5,I feel terrible that the meal I cooked for my ...,I cooked a meal for family and it came out hor...,,Entailment,It is often very embarrassing when someone coo...,train,CreativeParaphrase


In [None]:
df_flute_dataset['type'].unique()

array(['Sarcasm', 'Simile', 'CreativeParaphrase', 'Metaphor', 'Idiom'],
      dtype=object)

In [None]:
# Convert df_flute['idiom'] to a set for faster membership checks
idiom_set = set(df_flute_dataset['idiom'])

# Initialize variables for count and matched compounds
count = 0
matched_compounds = []

# Iterate through the DataFrame
for _, row in df.iterrows():
    if row['compound'] in idiom_set:
        count += 1
        matched_compounds.append(row['compound'])

# Print the count and matched compounds
print(f"Count of idiomatic compounds present in df_flute['idiom']: {count}")
print("Matched Compounds:")
print(matched_compounds)


Count of idiomatic compounds present in df_flute['idiom']: 4
Matched Compounds:
['shrinking violet', 'white hat', 'two-way street', 'chicken feed']


In [None]:
print(count)

0
