# EmoSim508

An Emoji Similarity Baseline Dataset with 508 Emoji Pairs and Similarity Ratings. From https://www.kaggle.com/sanjayaw/emosim508

In [1]:

import "fmt"
import "encoding/json"
import "io/ioutil"
import "strings"

## 1. Create json structure to handle data

In [2]:
type emoji struct{
    Unicodelong string `json:"unicodelong"`
    Unicodeshort string `json:"unicodeshort"`
    Title string `json:"title"`
}

type emojiTuple struct{
    EmojiOne emoji `json:"emojiOne"`
    EmojiTwo emoji `json:"emojiTwo"`
}

type emojiSimilarityMetrics struct{
    Google_Sense_Label float64
    Twitter_Sense_Def float64
    Google_Sense_All float64
    Google_Sense_Def float64
    Google_Sense_Desc float64
    Twitter_Sense_All float64
    Twitter_Sense_Desc float64
    Twitter_Sense_Label float64
    Human_Annotator_Agreement float64
}

type emojiSim struct{
    EmojiPairId string `json:"emojiPairId"`
    EmojiPair emojiTuple `json:"emojiPair"`
    EmojiPairSimilarity emojiSimilarityMetrics `json:"emojiPairSimilarity"`
}

## 2. load json file 

In [3]:
file, err := ioutil.ReadFile("../data/EmoSim508.json")

In [4]:
fmt.Println(err)

<nil>


6 <nil>

In [5]:
var emojisims []emojiSim

In [6]:
err = json.Unmarshal(file, &emojisims)

In [7]:
fmt.Println(err)

<nil>


6 <nil>

In [8]:
fmt.Println(emojisims)





65398 <nil>

## 3. Process data

#### select all the pair where le Human_Annotator_Agreement is greater than 3.5, meaning that the emojis are very  similar (0-4)


and stock only the emoji tuple

In [9]:
var resultPairs []emojiTuple

for _,val := range emojisims {
    if val.EmojiPairSimilarity.Human_Annotator_Agreement>3.5{
        resultPairs = append(resultPairs,val.EmojiPair)
    }
}

In [10]:
fmt.Println(len(resultPairs))

31


3 <nil>

#### save data to a file

In [11]:
jsonString, err := json.Marshal(resultPairs)

In [12]:
fmt.Println(string(jsonString))

[{"EmojiOne":{"Unicodelong":"\\U0001F3B5","Unicodeshort":"U+1F3B5","Title":"musical note"},"EmojiTwo":{"Unicodelong":"\\U0001F3B6","Unicodeshort":"U+1F3B6","Title":"musical notes"}},{"EmojiOne":{"Unicodelong":"\\U0001F38A","Unicodeshort":"U+1F38A","Title":"confetti ball"},"EmojiTwo":{"Unicodelong":"\\U0001F389","Unicodeshort":"U+1F389","Title":"party popper"}},{"EmojiOne":{"Unicodelong":"\\U0000263A","Unicodeshort":"U+263A","Title":"smiling face"},"EmojiTwo":{"Unicodelong":"\\U0001F60A","Unicodeshort":"U+1F60A","Title":"smiling face with smiling eyes"}},{"EmojiOne":{"Unicodelong":"\\U00002764","Unicodeshort":"U+2764","Title":"red heart"},"EmojiTwo":{"Unicodelong":"\\U0001F49E","Unicodeshort":"U+1F49E","Title":"revolving hearts"}},{"EmojiOne":{"Unicodelong":"\\U0001F495","Unicodeshort":"U+1F495","Title":"two hearts"},"EmojiTwo":{"Unicodelong":"\\U00002764","Unicodeshort":"U+2764","Title":"red heart"}},{"EmojiOne":{"Unicodelong":"\\U0001F49E","Unicodeshort":"U+1F49E","Title":"revolving h

5607 <nil>

In [13]:
if err = ioutil.WriteFile("emojisSim.json",jsonString,0755); err != nil{
    fmt.Println(err)
}