In [2]:
!pip install python-dotenv
!pip install langchain-community
!pip install neo4j
from dotenv import load_dotenv
import os

from langchain_community.graphs import Neo4jGraph

# Warning control
import warnings
warnings.filterwarnings("ignore")

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
Collecting langchain-community
  Downloading langchain_community-0.3.13-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.13 (from langchain-community)
  Downloading langchain-0.3.13-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.27 (from langchain-community)
  Downloading langchain_core-0.3.28-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.0-py3-none-any.w

In [3]:
load_dotenv('/content/sample_data/.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')
OPENAI_API_KEY = ''
OPENAI_ENDPOINT = 'https://api.openai.com/v1/embeddings'#os.getenv('OPENAI_BASE_URL') + '/embeddings'

In [4]:
kg = Neo4jGraph (
    url= "neo4j+s://221b4037.databases.neo4j.io", username="neo4j", password="", database="neo4j"
)

In [5]:
kg.query("""
  CREATE VECTOR INDEX movie_tagline_embeddings IF NOT EXISTS
  FOR (m:Movie) ON (m.taglineEmbedding)
  OPTIONS { indexConfig: {
    `vector.dimensions`: 1536,
    `vector.similarity_function`: 'cosine'
  }}"""
)


[]

In [6]:
kg.query("""
    MATCH (movie:Movie) WHERE movie.tagline IS NOT NULL
    WITH movie, genai.vector.encode(
        movie.tagline,
        "OpenAI",
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint
        }) AS vector
    CALL db.create.setNodeVectorProperty(movie, "taglineEmbedding", vector)
    """,
    params={"openAiApiKey":OPENAI_API_KEY, "openAiEndpoint": OPENAI_ENDPOINT} )

[]

In [7]:
kg.query("""
  SHOW VECTOR INDEXES
  """
)

[{'id': 6,
  'name': 'movie_tagline_embeddings',
  'state': 'ONLINE',
  'populationPercent': 100.0,
  'type': 'VECTOR',
  'entityType': 'NODE',
  'labelsOrTypes': ['Movie'],
  'properties': ['taglineEmbedding'],
  'indexProvider': 'vector-2.0',
  'owningConstraint': None,
  'lastRead': neo4j.time.DateTime(2024, 12, 22, 19, 17, 1, 967000000, tzinfo=<UTC>),
  'readCount': 1}]

In [8]:
question = "what movies are about sci-fi ?"

In [9]:
kg.query("""
    WITH genai.vector.encode(
        $question,
        "OpenAI",
        {
          token: $openAiApiKey,
          endpoint: $openAiEndpoint }) AS question_embedding
    CALL db.index.vector.queryNodes(
        'movie_tagline_embeddings',
        $top_k,
        question_embedding
        ) YIELD node AS movie, score
    RETURN movie.title, movie.tagline, score
    """,
    params={"openAiApiKey":OPENAI_API_KEY,
            "openAiEndpoint": OPENAI_ENDPOINT,
            "question": question,
            "top_k": 5
            })



[]

In [11]:
import pandas as pd
import numpy as np
import seaborn as sns                       #visualisation
import matplotlib.pyplot as plt             #visualisation
%matplotlib inline
sns.set(color_codes=True)


df = pd.read_csv("/content/Complex_Expanded_Factory_Data.csv")


from os import replace
col_rename_lst = {}
for series_name, series in df.items():
  col_rename_lst[series_name] = series_name.replace(' ','_').replace('%','pcnt').replace('(','').replace(')','').replace('$','dolrs').lower()

#print(col_rename_lst)

df = df.rename(columns=col_rename_lst)




factory                      366
date                         366
location                     366
machine_type                 366
machine_utilization_pcnt     366
machine_downtime_hours       366
maintenance_history          366
machine_age_years            366
batch_quality_pass_pcnt      366
cycle_time_minutes           366
energy_consumption_kwh       366
energy_efficiency_rating     366
co2_emissions_kg             366
emission_limit_compliance    366
waste_generated_kg           366
water_usage_liters           366
shift                        366
operator_experience_years    366
team_size                    366
operator_training_level      366
absenteeism_rate_pcnt        366
product_category             366
supplier                     366
supplier_delays_days         366
raw_material_quality         366
market_demand_index          366
cost_of_downtime_dolrs       366
revenue_dolrs                366
profit_margin_pcnt           366
breakdowns_count             366
safety_inc

In [None]:
df_f = df[['factory','location']]
df_f
df_f.to_csv('factory.csv', index=False)

In [None]:
df_d = df[['date']]
df_d['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_d.to_csv('date.csv', index=False)

In [12]:
df_m = df[['factory','location','date','machine_type','machine_utilization_pcnt','machine_downtime_hours','machine_age_years','cycle_time_minutes','energy_consumption_kwh','energy_efficiency_rating','co2_emissions_kg']]

df_m['factory_machine']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_m.to_csv('machine.csv', index=False)



In [15]:
df_pr = df[['batch_quality_pass_pcnt','waste_generated_kg','water_usage_liters','production_volume_units','defect_rate_pcnt','factory','date']]
df_pr['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_pr.to_csv('production_run.csv', index=False)

In [16]:
df_s = df[['shift']]
df_s['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_s.to_csv('shift.csv', index=False)

df_o = df[['operator_experience_years','operator_training_level','factory','date']]
df_o['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_o.to_csv('operator.csv', index=False)

In [17]:
df_t = df[['team_size']]
df_t['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_t.to_csv('team_size.csv', index=False)


In [18]:
df_p = df[['product_category']]
df_p['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_p.to_csv('product_category.csv', index=False)

In [19]:
df_sup = df[['supplier','supplier_delays_days','raw_material_quality']]
df_sup['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_sup.to_csv('supplier.csv', index=False)


df_mk = df[['market_demand_index']]
df_mk['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_mk.to_csv('market_demand.csv', index=False)

In [20]:
df_d = df[['machine_downtime_hours']]
df_d['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_d.to_csv('downtime.csv', index=False)

df_si = df[['safety_incidents_count']]
df_si['key_point']= df_m['factory']+'_'+df_m['location']+'_'+df_m['machine_type']+'_'+df_m['date']
df_si.to_csv('safety_incidents.csv', index=False)

In [None]:
kg.query("""LOAD CSV WITH HEADERS FROM "/content/factory.csv" AS row CREATE (f:Factory {name: "Factory 1", location: "City D"})""")
