In [35]:
import datatable as dt
import redis
import pandas as pd
import json
from datetime import datetime
from redis.commands.json.path import Path
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
from redisearch import TextField, NumericField, TagField, Query

### Connect to Redis

In [2]:
r = redis.Redis(host='localhost', port=6379, decode_responses=True)

### Load the data into a DataTable object

In [3]:
data = dt.fread("cleaned_data.csv")

### Convert DataTable to a list of dictionaries

In [48]:
df = data.to_pandas()

df = df.head(100)

df = df.drop(columns=['C0'])

df[:5]

Unnamed: 0,key,timestamp,advertisements,address_id,access_address_id,company_name,cvr_no,no_of_units,employees,employees_range,...,y,longitude,latitude,joint_status,date_of_incorporation,homepage,yearly_report_start,yearly_report_end,yearly_result,scrapped
0,2,2022-10-06 11:21:50,1.0,f9c3bc56-7e49-4ce1-b689-1893e35bed57,49953400-9077-4dd1-90d5-f6889aadf746,GRASS DK ApS,41201134,1,\N,\N,...,554204,9.89260622,57.01340725,NORMAL,2020-02-17,-,2021-01-01 00:00:00,2021-12-31 00:00:00,4255,\N
1,3,2022-10-06 11:21:50,1.0,0a3f50c9-d7fb-32b8-e044-0003ba298018,0a3f509c-274c-32b8-e044-0003ba298018,Bomholt Malerfirma ApS,40317430,1,1,ANTAL_1_1,...,556968,9.93806351,57.01048807,NORMAL,2019-03-05,www.bomholtmalerfirma.dk,2020-07-01 00:00:00,2021-06-30 00:00:00,18812,1
2,4,2022-10-06 11:21:50,1.0,0a3f50ca-b735-32b8-e044-0003ba298018,0a3f509c-b65e-32b8-e044-0003ba298018,"BO ANDERSEN HOLDING, SVENSTRUP ApS",33047754,1,\N,\N,...,549430,9.81295876,56.96558049,NORMAL,2010-06-28,-,2021-01-01 00:00:00,2021-12-31 00:00:00,703252,\N
3,5,2022-10-06 11:21:50,,0a3f50ca-260d-32b8-e044-0003ba298018,0a3f509c-58d9-32b8-e044-0003ba298018,Fyenbo Holding ApS,41026162,1,\N,\N,...,555493,9.91473184,57.0494713,NORMAL,2019-12-12,-,2021-01-01 00:00:00,2021-12-31 00:00:00,-52881,\N
4,7,2022-10-06 11:21:50,,0a3f50ca-33af-32b8-e044-0003ba298018,0a3f509c-6287-32b8-e044-0003ba298018,Kjeld Johannesen Holding ApS,37537535,1,\N,\N,...,542364,9.6980118,57.03392186,NORMAL,2016-03-08,-,2021-01-01 00:00:00,2021-12-31 00:00:00,1486249,\N


In [13]:
df.industry_designation_primary.tolist()

['Engroshandel med kemiske produkter',
 'Malerforretninger',
 'Ikke-finansielle holdingselskaber',
 'Ikke-finansielle holdingselskaber',
 'Ikke-finansielle holdingselskaber',
 'Udførelse af gulvbelægninger og vægbeklædning',
 'Virksomhedsrådgivning og anden rådgivning om driftsledelse',
 'Finansielle holdingselskaber',
 'Ikke-finansielle holdingselskaber',
 'Investeringsselskaber',
 'Ikke-finansielle holdingselskaber',
 'Drift af sportsanlæg',
 'Ikke-finansielle holdingselskaber',
 'Teater- og koncertvirksomhed',
 'Forhandlere af sports- og campingudstyr',
 'Ikke-finansielle holdingselskaber',
 'Ikke-finansielle holdingselskaber',
 'Gennemførelse af byggeprojekter',
 'Ikke-finansielle holdingselskaber',
 'Ikke-finansielle holdingselskaber',
 'Ikke-finansielle holdingselskaber',
 'Kombinerede serviceydelser',
 'Almindelig rengøring i bygninger',
 'Ikke-finansielle holdingselskaber',
 'Ikke-finansielle holdingselskaber',
 'Anden udlejning af boliger',
 'Ikke-finansielle holdingselskaber'

### Upload the data to Redis

In [12]:
for _, row in df.iterrows():
    row_id = row['key']  # Assuming 'key' column contains the unique identifier
    row_dict = row.drop('key').to_dict()
    
    # Convert Timestamp values to string
    for key, value in row_dict.items():
        if isinstance(value, pd.Timestamp):
            row_dict[key] = value.strftime('%Y-%m-%d %H:%M:%S')
    
    r.hset(row_id, mapping=row_dict)

print("Data uploaded to Redis successfully.")

Data uploaded to Redis successfully.


### Test the data retrieval from Redis

In [None]:
# Test the data retrieval from Redis
keys = r.keys("*")
for key in keys:
    values = r.hgetall(key)
    print(f"Key: {key}, Values: {values}")

In [20]:
# df.head()
df = df[["key","cvr_no", "company_name","industry_designation_primary"]]
df.head()

Unnamed: 0,key,cvr_no,company_name,industry_designation_primary
0,2,41201134,GRASS DK ApS,Engroshandel med kemiske produkter
1,3,40317430,Bomholt Malerfirma ApS,Malerforretninger
2,4,33047754,"BO ANDERSEN HOLDING, SVENSTRUP ApS",Ikke-finansielle holdingselskaber
3,5,41026162,Fyenbo Holding ApS,Ikke-finansielle holdingselskaber
4,7,37537535,Kjeld Johannesen Holding ApS,Ikke-finansielle holdingselskaber


### Define a schema

In [50]:
schema = (
    TextField("$.key", as_name="key"),
    TextField("$.cvr_no", as_name="cvr_no"),
    TextField("$.company_name", as_name="company_name"),
    TextField("$.industry_designation_primary", as_name="industry_designation_primary")
)

### Set index name

In [69]:
index_name = "idx:cleaned_data"

### Create an index

In [70]:
rs = r.ft(index_name)
rs.create_index(
    schema,
    definition=IndexDefinition(
        prefix=["cleaned_data:"], index_type=IndexType.JSON
    )
)

'OK'

### Upload data to Redis and index it

In [71]:
for _, row in df.iterrows():
    row_id = row['key']
    row_dict = row.drop('key').to_dict()

    # Convert Timestamp values to string
    for key, value in row_dict.items():
        if isinstance(value, pd.Timestamp):
            row_dict[key] = value.strftime('%Y-%m-%d %H:%M:%S')
    
    r.set(row_id, mapping=row_dict)
    # rs.add_document(row_id, **row_dict)

    # r.execute_command('FT.ADD', index_name, row_id, 1.0, 'FIELDS', *sum(row_dict.items(), ()))

print("Data stored and indexed successfully.")

Unexpected exception formatting exception. Falling back to standard exception


  rs.add_document(row_id, **row_dict)
Traceback (most recent call last):
  File "e:\K2X\redis-local-store-data\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\stepp\AppData\Local\Temp\ipykernel_14488\2875914499.py", line 11, in <module>
    rs.add_document(row_id, **row_dict)
  File "e:\K2X\redis-local-store-data\venv\lib\site-packages\redis\utils.py", line 108, in wrapper
  File "e:\K2X\redis-local-store-data\venv\lib\site-packages\redis\commands\search\commands.py", line 283, in add_document
  File "e:\K2X\redis-local-store-data\venv\lib\site-packages\redis\commands\search\commands.py", line 218, in _add_document
  File "e:\K2X\redis-local-store-data\venv\lib\site-packages\redis\client.py", line 1269, in execute_command
    ``asynchronous`` indicates whether the operation is
  File "e:\K2X\redis-local-store-data\venv\lib\site-packages\redis\retry.py", line 46, in call_with_retry
  

### Perform a search query

In [None]:
query = '@company_name:(GRASS DK ApS)'
result = r.execute_command('FT.SEARCH', 'idx:cleaned_data', query)

print('Search results:')
for doc_id in result[1::2]:
    doc = r.hgetall(doc_id)
    print(f'Document ID: {doc_id}, Data: {doc}')