In [10]:
import pandas as pd
from langdetect import detect, detect_langs
from deep_translator import GoogleTranslator

In [2]:
df = pd.read_csv("contestants_with_spotify.csv")
df.head

<bound method NDFrame.head of       year to_country_id   to_country                     artist  \
0     1956            ch  Switzerland                  Lys Assia   
1     1956            nl  Netherlands                Jetty Paerl   
2     1956            be      Belgium                Fud Leclerc   
3     1956            de      Germany     Walter Andreas Schwarz   
4     1956            fr       France               Mathé Altéry   
...    ...           ...          ...                        ...   
1729  2023            lv       Latvia              Sudden Lights   
1730  2023            ie      Ireland                 Wild Youth   
1731  2023            nl  Netherlands  Mia Nicolai & Dion Cooper   
1732  2023            az   Azerbaijan                TuralTuranX   
1733  2023            mt        Malta                 The Busker   

                                 song  place_contest  sf_num  running_final  \
0                             Refrain            2.0     NaN            2.

In [3]:
summary = (
    df.groupby("year")
      .agg(
          max_running_final=("running_final", "max"),
          count_entries=("running_final", "count")
      )
      .reset_index()
)
summary["max_not_equal_count"] = summary["max_running_final"] != summary["count_entries"]

print(summary)

    year  max_running_final  count_entries  max_not_equal_count
0   1956               14.0             12                 True
1   1957               10.0             10                False
2   1958               10.0             10                False
3   1959               11.0             11                False
4   1960               13.0             13                False
..   ...                ...            ...                  ...
63  2019               26.0             26                False
64  2020               23.0              1                 True
65  2021               26.0             26                False
66  2022               25.0             25                False
67  2023               26.0             26                False

[68 rows x 4 columns]


In [4]:
# Filter rows where max_not_equal_count is True
filtered = summary[summary["max_not_equal_count"]]

print(filtered)

    year  max_running_final  count_entries  max_not_equal_count
0   1956               14.0             12                 True
64  2020               23.0              1                 True


In [5]:
def safe_detect(text):
    try:
        if pd.isna(text):   # skip NaN
            return None
        return detect(str(text))  # force to string
    except Exception:
        return None  # fallback if detection fails

df["language"] = df["lyrics"].apply(safe_detect)

df.head(10)

Unnamed: 0,year,to_country_id,to_country,artist,song,place_contest,sf_num,running_final,running_sf,place_final,...,lyricists,lyrics,youtube_url,spotify_track_id,spotify_track_name,spotify_album_name,spotify_album_release_date,spotify_track_popularity,spotify_url,language
0,1956,ch,Switzerland,Lys Assia,Refrain,2.0,,2.0,,2.0,...,,"(Refrain d'amour...)\n\nRefrain, couleur du ci...",https://youtube.com/watch?v=IyqIPvOkiRk,0OtKMBmhEuQ4hGPbMmtErz,Refrain,Schwarze Perlen,17/12/1953,11.0,https://open.spotify.com/track/0OtKMBmhEuQ4hGP...,fr
1,1956,nl,Netherlands,Jetty Paerl,De Vogels Van Holland,2.0,,1.0,,2.0,...,Annie M. G. Schmidt,De vogels van Holland zijn zo muzikaal\nZe ler...,https://youtube.com/watch?v=u45UQVGRVPA,0TwQ2TgfJxZbyeQvCzeMKY,De Vogels Van Holland,Europop (Legends Edition),19/03/2019,0.0,https://open.spotify.com/track/0TwQ2TgfJxZbyeQ...,nl
2,1956,be,Belgium,Fud Leclerc,Messieurs Les Noyés De La Seine,2.0,,3.0,,2.0,...,Robert Montal,Messieurs les noyés de la Seine\nOuvrez-moi le...,https://youtube.com/watch?v=U9O3sqlyra0,,,,,,,fr
3,1956,de,Germany,Walter Andreas Schwarz,Im Wartesaal Zum Großen Glück,2.0,,4.0,,2.0,...,,"Es gibt einen Hafen, da fährt kaum ein Schiff\...",https://youtube.com/watch?v=BDNARIDnmTc,6GEjmkRXCf0WCWptS9sRGb,Im Wartesaal zum großen Glück,Germany 12 Points 2005 Countdown Grand Prix,04/03/2005,4.0,https://open.spotify.com/track/6GEjmkRXCf0WCWp...,de
4,1956,fr,France,Mathé Altéry,Le Temps Perdu,2.0,,5.0,,2.0,...,Rachèle Thoreau,"Chante, carillon\nLe chant du temps perdu\nCha...",https://youtube.com/watch?v=dm1L0XyikKI,,,,,,,fr
5,1956,lu,Luxembourg,Michèle Arnaud,Ne Crois Pas,2.0,,13.0,,2.0,...,,Si on te dit qu't'a une belle gueule\nY a pas ...,https://youtube.com/watch?v=Pv7GJkqtNuc,6zbFKX74gD44REVhScTvBP,Ne Crois Pas,Vintage French Song No. 142 - EP: Les Amours O...,17/08/1956,0.0,https://open.spotify.com/track/6zbFKX74gD44REV...,fr
6,1956,it,Italy,Franca Raimondi,Aprite Le Finestre,2.0,,7.0,,2.0,...,Pinchi,La prima rosa rossa è già sbocciata\nE nascon ...,https://youtube.com/watch?v=HHA1mC2RkKc,2qlN0HtvDVR2c3f1TGT0bb,Aprite le finestre (remastered),El Festival De San Remo - Los Años De Oro (195...,01/01/2006,16.0,https://open.spotify.com/track/2qlN0HtvDVR2c3f...,it
7,1956,nl,Netherlands,Corry Brokken,Voorgoed Voorbij,2.0,,8.0,,2.0,...,,Voorgoed voorbij\nNu zijn we nooit meer samen\...,https://youtube.com/watch?v=6OjNzLaifFM,6LQLzhnZQqyPAp2H6Cszuz,Voorgoed Voorbij,Net Als Toen,20/09/2001,0.0,https://open.spotify.com/track/6LQLzhnZQqyPAp2...,nl
8,1956,be,Belgium,Mony Marc,Le Plus Beau Jour De Ma Vie,2.0,,10.0,,2.0,...,David Bee,Les cloches sonnent\nTout carillonne\nLe plus ...,https://youtube.com/watch?v=dL139DezFwo,,,,,,,fr
9,1956,de,Germany,Freddy Quinn,So Geht Das Jede Nacht,2.0,,11.0,,2.0,...,Peter Mösser,"Am Sonntag mit Jimmy, am Montag mit Jack\nAm D...",https://youtube.com/watch?v=DJFGwPVKfyk,3m57DgqNNc2MRfKr38hWPb,So Geht Das Jede Nacht,Schlager Hits Der 50er,19/02/2010,1.0,https://open.spotify.com/track/3m57DgqNNc2MRfK...,de


In [6]:
#print(df.language.unique())
langsummary = (
    df.groupby("language")
      .agg(
          count_entries=("song", "count")
      )
    .reset_index()
    .sort_values(by="count_entries", ascending=False)
)
print(langsummary)

   language  count_entries
5        en            776
9        fr            173
4        de             91
10       hr             86
13       it             71
6        es             63
16       nl             56
19       pt             51
23       sl             45
17       no             45
8        fi             39
26       sv             37
3        da             29
29       tr             28
7        et             23
11       hu             13
25       sq             13
28       tl             12
18       pl             10
20       ro             10
12       id             10
0        af              9
22       sk              9
1        ca              4
24       so              4
14       lt              4
27       sw              4
2        cs              2
21       ru              1
15       lv              1


In [12]:
df["lyrics_en"] = df["lyrics"]

mask = (df["language"].notna()) & (df["language"] != "en")

def safe_translate(text):
    if pd.isna(text):
        return None
    try:
        return GoogleTranslator(source="auto", target="en").translate(str(text))
    except Exception as e:
        return f"[Translation error: {e}]"

df.loc[mask, "lyrics_en"] = df.loc[mask, "lyrics"].apply(safe_translate)

print(df.head())

   year to_country_id   to_country                  artist  \
0  1956            ch  Switzerland               Lys Assia   
1  1956            nl  Netherlands             Jetty Paerl   
2  1956            be      Belgium             Fud Leclerc   
3  1956            de      Germany  Walter Andreas Schwarz   
4  1956            fr       France            Mathé Altéry   

                              song  place_contest  sf_num  running_final  \
0                          Refrain            2.0     NaN            2.0   
1            De Vogels Van Holland            2.0     NaN            1.0   
2  Messieurs Les Noyés De La Seine            2.0     NaN            3.0   
3    Im Wartesaal Zum Großen Glück            2.0     NaN            4.0   
4                   Le Temps Perdu            2.0     NaN            5.0   

   running_sf  place_final  ...  \
0         NaN          2.0  ...   
1         NaN          2.0  ...   
2         NaN          2.0  ...   
3         NaN          2.0  ..

In [13]:
df.to_csv("translatedlyrics.csv", index=False)
