# 1. Importing libraries

## 1.1 Installing packages

In [5]:
!pip install translate

Collecting translate
  Downloading translate-3.6.1-py2.py3-none-any.whl (12 kB)
Collecting libretranslatepy==2.1.1 (from translate)
  Downloading libretranslatepy-2.1.1-py3-none-any.whl (3.2 kB)
Installing collected packages: libretranslatepy, translate
Successfully installed libretranslatepy-2.1.1 translate-3.6.1


In [6]:
!pip install deepl

Collecting deepl
  Downloading deepl-1.18.0-py3-none-any.whl (35 kB)
Installing collected packages: deepl
Successfully installed deepl-1.18.0


## 1.2 Adding source and path to find files (Google Colab)

In [2]:
import os
from google.colab import drive

In [3]:
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Voice of Nature/Data/'
os.chdir(path)

Mounted at /content/drive












# 2. Loading data

## 2.1 Libraries and functions

In [4]:
import pandas as pd
import numpy as np
import re

In [7]:
# translation
from translate import Translator
import deepl
translator = deepl.Translator() # add your key here

In [None]:
def replace(text, replacements=None, lower=True):
    '''Replaces multiple characters defined in a dictionary in textual data.'''
    if lower == True:
      text = text.lower()

    if replacements != None:
      for old, new in replacements.items():
          text = str(text).replace(old, new)

    return text

## 2.2 Data loading and preparation

In [None]:
# importing the excel with all sheets - takes 30 sec
dfs_import = pd.read_excel('ndhvndata_clean_200923_MMnamedtabs.xlsx', sheet_name=None)

In [None]:
# renaming sheets for easier coding
replacements = {'ø': 'o', '+': '', '-': '_', ' ': '_', '__': '_'}

new_keys = [replace(key, replacements) for key in dfs_import.keys()]
dfs = dict(zip(new_keys, dfs_import.values()))

In [None]:
# defining my sheets of interest
wishes = {'wild_nature': 'Wild Nature',
          'lawns_flowers': 'Lawns and Flowers',
          'social_areas': 'Social Areas',
          'sports_facilities': 'Sports Facilities',
          'facilities_other': 'Other Facilities',
          'other': 'Other'}

for key, value in list(wishes.items()):
    no_key = 'no_' + key
    no_value = 'No ' + value
    wishes[no_key] = no_value

wishes

{'wild_nature': 'Wild Nature',
 'lawns_flowers': 'Lawns and Flowers',
 'social_areas': 'Social Areas',
 'sports_facilities': 'Sports Facilities',
 'facilities_other': 'Other Facilities',
 'other': 'Other',
 'no_wild_nature': 'No Wild Nature',
 'no_lawns_flowers': 'No Lawns and Flowers',
 'no_social_areas': 'No Social Areas',
 'no_sports_facilities': 'No Sports Facilities',
 'no_facilities_other': 'No Other Facilities',
 'no_other': 'No Other'}

In [None]:
# removing the first (empty) row - was used for indexing data
for key in wishes.keys():
  dfs[key] = dfs[key].drop(dfs[key].index[0])

In [None]:
# renaming columns
rename_cols = ['lat_long', 'description', 'reason']

for key in wishes.keys():
  dfs[key].rename(columns=lambda col: replace(col, {' ': '_'}), inplace=True)
  dfs[key].rename(columns=dict(zip(dfs[key].columns[-3:], rename_cols)), inplace=True)
  dfs[key] = dfs[key][['respondent_id', 'language', 'wkt', 'geojson', 'lat_long', 'description', 'reason']]

In [None]:
# nan to empty string
for key in wishes.keys():
  for col in ['description', 'reason']:
    dfs[key][col].fillna('', inplace=True)

In [None]:
# bringing it all into one dataframe
wishes_df = pd.DataFrame()

for key in wishes.keys():
  df = dfs[key].copy()

  if 'no_' in key:
    df['wish'] = 0
  else:
    df['wish'] = 1
  df['category'] = re.sub('no_*', '', key)

  wishes_df = pd.concat([wishes_df, df], ignore_index=True)

## 2.3 Translation - EN to DA

In [None]:
def change_language(text, input_lang='en', target_lang='da'):
    '''Translates text from English to Danish'''
    translated_text = translator.translate_text(text.lower(), target_lang=target_lang)
    return translated_text

In [None]:
# LAST RUNTIME: 03/03/2024 19:23
# translate and saving translated df to save on translations - takes ca. 3 min, ca. 63 000 characters
df = wishes_df.copy()

for col in ['description', 'reason']:
  condition = (df['language'] == 'en') | (df['language'] == 'en, da')
  df.loc[condition, col] = df.loc[condition, col].apply(lambda x: change_language(str(x)) if x != '' else x)

df.to_csv('wishes_translated.csv', index = False)

## 2.3 Translation - DA to EN

In [None]:
def change_language(text, input_lang='da', target_lang='en-gb'):
    '''Translates text from Danish to English'''
    translated_text = translator.translate_text(text, target_lang=target_lang)
    return translated_text

In [None]:
# LAST RUNTIME: 10/05/2024 12:26
# translate and saving translated df to save on translations - took ca. 21 min, ca. 302 489 characters
df = wishes_df.copy()

for col in ['description', 'reason']:
  condition = (df['language'] == 'da') | (df['language'] == 'da, en')
  df.loc[condition, col] = df.loc[condition, col].apply(lambda x: change_language(str(x)) if x != '' else x)

In [21]:
# preparing the final wishes dataset
df['description'] = df['description'].replace('Dyr', 'Animals')
df['comment'] = df['description']+' '+df['reason']
df = df[['respondent_id', 'wkt', 'category', 'wish', 'comment']]
df.head(2)

Unnamed: 0,respondent_id,wkt,category,wish,comment
0,72het4dmv4k3,POINT (12.612999 55.717601),wild_nature,1,
1,88pna6cwg8x6,POINT (12.617608 55.725923),wild_nature,1,"Animals The beauty, the calm."


In [22]:
df.to_csv('wishes_translated_en.csv', index=False)

# Next steps are in the codebooks:
**Output data:** 'wishes_translated_en.csv'
 - 3-merge-and-word-embeddings-10-05-2024
 - 4-merged-content-analysis-17-05-2024