## Import necessary libraries

In [17]:
# Check if langchain[llm] is installed
try:
    import langchain
    print("langchain[llm] is already installed.")
except ImportError:
    print("langchain[llm] is not installed. Installing...")
    %pip install langchain[llm]

# Check if gpt4all is installed
try:
    from gpt4all import GPT4All
    print("gpt4all is already installed.")
except ImportError:
    print("gpt4all is not installed. Installing...")
    %pip install gpt4all

langchain[llm] is already installed.
gpt4all is already installed.


In [18]:
from gpt4all import GPT4All
import os
import requests
from tqdm.notebook import tqdm
from pathlib import Path
import platform

## Configuration Constants

In [19]:
MODEL_URL = "https://huggingface.co/nomic-ai/gpt4all-falcon-ggml/blob/main/ggml-model-gpt4all-falcon-q4_0.bin"
MODEL_DIR = "models"
MODEL_FILENAME = "ggml-model-gpt4all-falcon-q4_0.bin"
MODEL_PATH = Path("..") / MODEL_DIR / MODEL_FILENAME

# Determine the current operating system
current_os = platform.system()
if current_os == "Windows":
    # Convert to a string with backslashes for Windows
    model_path_str = MODEL_PATH.as_posix()
else:
    # Convert to a string with forward slashes for other platforms (e.g., Ubuntu)
    model_path_str = MODEL_PATH.as_posix()

In [20]:
# Heading: Download Model
print("Download Model")

def download_model():
    """
    Download the model file if it doesn't exist locally.
    """
    # Check if the model file already exists
    if not os.path.exists(MODEL_PATH):
        try:
            # Ensure the directory exists
            os.makedirs(MODEL_DIR, exist_ok=True)

            print(f"Downloading model from {MODEL_URL}...")
            response = requests.get(MODEL_URL, stream=True)  # Use streaming to download in chunks
            total_size = int(response.headers.get('content-length', 0))
            block_size = 1024  # 1 KB

            # Create a progress bar using tqdm
            with open(MODEL_PATH, 'wb') as file, tqdm(
                total=total_size, unit='B', unit_scale=True, unit_divisor=1024
            ) as pbar:
                for data in response.iter_content(block_size):
                    file.write(data)
                    pbar.update(len(data))

            # Ensure the directory exists
            if not os.path.exists(MODEL_DIR):
                os.makedirs(MODEL_DIR, exist_ok=True)
            print(f"Downloaded model to {MODEL_PATH}")
        except Exception as e:
            print(f"An error occurred: {str(e)}")
    else:
        print("Model file already exists. Skipping download.")

# Call the download_model function to download the model if necessary
download_model()


Download Model
Model file already exists. Skipping download.


## Importing Models in Langchain

### GPT4All models

In [21]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

### Asking Question

In [22]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [23]:
MODEL_PATH

WindowsPath('../models/ggml-model-gpt4all-falcon-q4_0.bin')

In [24]:
# Callbacks support token-wise streaming
callbacks = [StreamingStdOutCallbackHandler()]

# Verbose is required to pass to the callback manager
llm = GPT4All(model=model_path_str, callbacks=callbacks, verbose=True)

Found model file at  ../models/ggml-model-gpt4all-falcon-q4_0.bin


In [25]:
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [26]:
output = llm_chain("How to make milk tea?")



1. Boil water in a pot.
2. Add loose leaf tea (e.g. green tea, jasmine tea) into the pot.
3. Let it steep for 5-10 minutes.
4. Strain the tea into another pot.
5. Add milk and sweetener (e.g. sugar, honey) to taste.
6. Heat the mixture on low heat until it's warm.
7. Pour the tea into a cup and enjoy!

In [27]:
output.keys()

dict_keys(['question', 'text'])

## User Defined Question

In [28]:
input = input("Ask me anything:")
output = llm_chain(str(input))
answer = output['text']
print(answer)



1. Check for redundant features
2. Check for irrelevant features
3. Check for overlapping features
4. Check for redundant values
5. Check for irrelevant values
6. Check for overlapping values
7. Check for redundant values
8. Check for irrelevant values
9. Check for overlapping values
10. Check for redundant values
11. Check for irrelevant values
12. Check for overlapping values
13. Check for redundant values
14. Check for irrelevant values
15. Check for overlapping values
16. Check for redundant values
17. Check for irrelevant values
18. Check for overlapping values
19. Check for redundant values
20. Check for irrelevant values
21. Check for overlapping values
22. Check for redundant values
23. Check for irrelevant values
24. Check for overlapping values
25. Check for redundant values
26. Check for irrelevant values
27. Check for overlapping values
28. Check for redundant values
29. Check for irrelevant values
30. Check for overlapping values
31. Check for redundant values
32. Check 

In [29]:
print(f"You asked: {output['question']}")

You asked: How to reduce no of features in a dataset?
