### Importing Necessary Libraries

In [3]:
import pandas as pd
from elasticsearch import helpers
from elasticsearch.helpers import streaming_bulk
from elasticsearch import Elasticsearch

In [4]:
df = pd.read_csv("ONE PIECE.csv")
df.columns

Index(['Unnamed: 0', 'rank', 'trend', 'season', 'episode', 'name', 'start',
       'total_votes', 'average_rating'],
      dtype='object')

In [12]:
df = df.drop("Unnamed: 0", axis=1)
df

Unnamed: 0,rank,trend,season,episode,name,start,total_votes,average_rating
0,24129,18,1,1,I'm Luffy! The Man Who Will Become the Pirate ...,1999,647,7.6
1,29290,11,1,2,"The Great Swordsman Appears! Pirate Hunter, Ro...",1999,473,7.8
2,32043,7,1,3,Morgan vs. Luffy! Who's This Beautiful Young G...,1999,428,7.7
3,28818,8,1,4,Luffy's Past! The Red-haired Shanks Appears!,1999,449,8.1
4,37113,4,1,5,"Fear, Mysterious Power! Pirate Clown Captain B...",1999,370,7.5
...,...,...,...,...,...,...,...,...
953,41448,26,1,954,Its Name is Enma! Oden's Meito!,2020,302,7.7
954,35342,44,1,955,&quot;A New Alliance?! Kaido's Army Gathers&quot;,2020,407,7.4
955,33715,75,1,956,Ticking Down to the Great Battle! The Straw Ha...,2020,353,8.2
956,2940,964,1,957,Big News! The Warlords Attack Incident,2021,2862,9.1


In [13]:
df.columns

Index(['rank', 'trend', 'season', 'episode', 'name', 'start', 'total_votes',
       'average_rating'],
      dtype='object')

In [4]:
df.info

<bound method DataFrame.info of        rank trend  season  episode  \
0    24,129    18       1        1   
1    29,290    11       1        2   
2    32,043     7       1        3   
3    28,818     8       1        4   
4    37,113     4       1        5   
..      ...   ...     ...      ...   
953  41,448    26       1      954   
954  35,342    44       1      955   
955  33,715    75       1      956   
956   2,940   964       1      957   
957  14,751     -       1      958   

                                                  name  start total_votes  \
0    I'm Luffy! The Man Who Will Become the Pirate ...   1999         647   
1    The Great Swordsman Appears! Pirate Hunter, Ro...   1999         473   
2    Morgan vs. Luffy! Who's This Beautiful Young G...   1999         428   
3         Luffy's Past! The Red-haired Shanks Appears!   1999         449   
4    Fear, Mysterious Power! Pirate Clown Captain B...   1999         370   
..                                               

In [5]:
df.isnull().sum()

rank              0
trend             0
season            0
episode           0
name              0
start             0
total_votes       0
average_rating    0
dtype: int64

### Creating Connections

In [7]:
es = Elasticsearch('http://localhost:9200')
print(es.ping())

True




In [8]:
def data_generator(df):
    for _, row in df.iterrows():
        yield {
            "_index": 'one_piece',
            "_source": {
                "rank": row['rank'],
                "trend": row['trend'],
                "season":row['season'],
                "episode":row['episode'],
                "name":row['name'],
                "start":row['start'],
                "total_votes":row['total_votes'],
                "average_rating":row['average_rating']
            }
        }

In [9]:
# Using the streaming_bulk() helper function to bulk insert the data into Elasticsearch
for success, info in streaming_bulk(client=es, actions=data_generator(df)):
    if not success:
        print('A document failed:', info)

In [22]:
def create_indices():
    es_client = Elasticsearch('http://localhost:9200', verify_certs=False)
    es_client.ping()
    config = {
        "mappings": {
                "properties": {
                    "rank" : {
                    "type": "text"
                    },
                    "trend" :{
                    "type": "text"
                    },
                    "season":{
                    "type": "text"
                    },
                    "episode":{
                    "type": "text"
                    },
                    "name":{
                    "type": "text"
                    },
                    "start":{
                    "type": "text"
                    },
                    "total_votes":{
                    "type": "text"
                    }, 
                    "average_rating":{
                    "type": "text"
                    }
                }
        },
        "settings": {
            "number_of_shards": 2,
            "number_of_replicas": 1
        }
    }

    es_client.indices.create(
        index="one_piece",
        settings=config["settings"],
        mappings=config["mappings"],
    )
    #Loading Excel File
    def reco_data_generator(df):
        for _, row in df.iterrows():
            yield {
            "_index": 'one_piece',
            "_source": {
                "rank": row['rank'],
                "trend": row['trend'],
                "season":row['season'],
                "episode":row['episode'],
                "name":row['name'],
                "start":row['start'],
                "total_votes":row['total_votes'],
                "average_rating":row['average_rating']
            }
            }
    # Using the streaming_bulk() helper function to bulk insert the data into Elasticsearch
    for success, info in streaming_bulk(client=es_client, actions=reco_data_generator(df)):
        if not success:
            print('A document failed:', info)

In [23]:
create_indices()

  This is separate from the ipykernel package so we can avoid doing imports until
