In [None]:
from elasticsearch import Elasticsearch
es_client = Elasticsearch(
    "http://localhost:9200",
    basic_auth=["elastic", "changeme"],
)

from elasticsearch.client import IndicesClient

es_index_client = IndicesClient(es_client)

configurations = {
    "settings": {
        "index": {"number_of_replicas": 2},
        "analysis": {
            "filter": {
                "ngram_filter": {
                    "type": "edge_ngram",
                    "min_gram": 2,
                    "max_gram": 15,
                },
            },
            "analyzer": {
                "ngram_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": ["lowercase", "ngram_filter"],
                },
            },
        },
    },
    "mappings": {
        "properties": {
            "id": {"type": "long"},
            "name": {
                "type": "text",
                "analyzer": "standard",
                "fields": {
                    "keyword": {"type": "keyword"},
                    "ngrams": {"type": "text", "analyzer": "ngram_analyzer"},
                },
            },
            "brand": {
                "type": "text",
                "fields": {
                    "keyword": {"type": "keyword"},
                },
            },
            "price": {"type": "float"},
            "attributes": {
                "type": "nested",
                "properties": {
                    "attribute_name": {"type": "text"},
                    "attribute_value": {"type": "text"},
                },
            },
        }
    },
}

es_index_client.create(index="movies", body=configurations)

In [None]:
import csv
import json

colums = ["id", "Release_Date","Title","Overview","Popularity","Vote_Count","Vote_Average","Original_Language","Genre","Poster_Url"]
index_name = "movies"

with open("/Users/titouanhoude/Documents/GitHub/ELK-SISE/movies.csv", "r") as fi:
    reader = csv.DictReader(
        fi, fieldnames=colums, delimiter=",", quotechar='"'
    )

    # This skips the first row which is the header of the CSV file.
    next(reader)

    actions = []
    for row in reader:
        action = {"index": {"_index": index_name, "_id": row["id"]}}
        doc = {
            "id" : row["id"],
            "Release_Date": row["Release_Date"],
            "Title": row["Title"],
            "Overview": row["Overview"],
            "Popularity": row["Popularity"],
            "Vote_Count": row["Vote_Count"],
            "Vote_Average": row["Vote_Average"],
            "Original_Language": row["Original_Language"],
            "Genre": row["Genre"],
            "Poster_Url": row["Poster_Url"],
        }
        actions.append(json.dumps(action))
        actions.append(json.dumps(doc))

    with open("movies_to_elastic_format.csv", "w") as fo:
        fo.write("\n".join(actions))

    es_client.bulk(body="\n".join(actions))