In [5]:
from langchain_community.document_loaders import PyPDFLoader

In [51]:
loader = PyPDFLoader("../data/Evolution_of_AI.pdf")
data = loader.load()
data

[Document(metadata={'source': '../data/Evolution_of_AI.pdf', 'page': 0}, page_content='Jackson, Emerson Abraham\nWorking Paper\nThe Evolution of Artificial Intelligence: A Theoretical\nReview of its Impact on Teaching and Learning in the\nDigital Age\nSuggested Citation: Jackson, Emerson Abraham (2024) : The Evolution of Artificial Intelligence:\nA Theoretical Review of its Impact on Teaching and Learning in the Digital Age, ZBW – Leibniz\nInformation Centre for Economics, Kiel, Hamburg\nThis Version is available at:\nhttps://hdl.handle.net/10419/280893\nStandard-Nutzungsbedingungen:\nDie Dokumente auf EconStor dürfen zu eigenen wissenschaftlichen\nZwecken und zum Privatgebrauch gespeichert und kopiert werden.\nSie dürfen die Dokumente nicht für öffentliche oder kommerzielle\nZwecke vervielfältigen, öffentlich ausstellen, öffentlich zugänglich\nmachen, vertreiben oder anderweitig nutzen.\nSofern die Verfasser die Dokumente unter Open-Content-Lizenzen\n(insbesondere CC-Lizenzen) zur Ver

In [8]:
pages = loader.load_and_split()
pages = pages[1:]  # Skip the first few pages as they are not required
text = "\n".join([doc.page_content for doc in pages])

In [48]:
print(len(pages))
pages

14


[Document(metadata={'source': 'Evolution_of_AI.pdf', 'page': 1}, page_content="1 \n The Evolution of Artificial Intelligence: A Theoretical Review of its Impact on Teaching \nand Learning in the Digital Age  \nEmerson Abraham Jackson1 \nORCID : https://orcid.org/0000 -0002 -2802 -6152  \nAbstract : \nThis theoretical review explores the evolution of artificial intelligence (AI) and its impact on \nteaching and learning in the digital age. Investigating AI's integration into educational settings, the \npaper synthesises theoretical frameworks, empirical studies, and emerging trends. Drawing on  \nconstructivist, socio -cultural, and cognitive learning theories, the review analyses AI's \nimplications for educational practices. It traces the historical development of AI in education, \nhighlighting key milestones and the evolution of AI technologies. Th e paper adopts a theoretical \nframework to comprehensively analyse AI's impact, focusing on intelligent tutoring systems, \nadaptive le

In [10]:
text

"1 \n The Evolution of Artificial Intelligence: A Theoretical Review of its Impact on Teaching \nand Learning in the Digital Age  \nEmerson Abraham Jackson1 \nORCID : https://orcid.org/0000 -0002 -2802 -6152  \nAbstract : \nThis theoretical review explores the evolution of artificial intelligence (AI) and its impact on \nteaching and learning in the digital age. Investigating AI's integration into educational settings, the \npaper synthesises theoretical frameworks, empirical studies, and emerging trends. Drawing on  \nconstructivist, socio -cultural, and cognitive learning theories, the review analyses AI's \nimplications for educational practices. It traces the historical development of AI in education, \nhighlighting key milestones and the evolution of AI technologies. Th e paper adopts a theoretical \nframework to comprehensively analyse AI's impact, focusing on intelligent tutoring systems, \nadaptive learning platforms, virtual reality, natural language processing, and gamificati

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [15]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=150,
    length_function=len,
    is_separator_regex=False,
)

text_splitter

<langchain_text_splitters.character.RecursiveCharacterTextSplitter at 0x1e906baff20>

In [47]:
docs = text_splitter.create_documents([text])
print(len(docs))
docs[:5]

112


[Document(metadata={}, page_content="1 \n The Evolution of Artificial Intelligence: A Theoretical Review of its Impact on Teaching \nand Learning in the Digital Age  \nEmerson Abraham Jackson1 \nORCID : https://orcid.org/0000 -0002 -2802 -6152  \nAbstract : \nThis theoretical review explores the evolution of artificial intelligence (AI) and its impact on \nteaching and learning in the digital age. Investigating AI's integration into educational settings, the"),
 Document(metadata={}, page_content="teaching and learning in the digital age. Investigating AI's integration into educational settings, the \npaper synthesises theoretical frameworks, empirical studies, and emerging trends. Drawing on  \nconstructivist, socio -cultural, and cognitive learning theories, the review analyses AI's \nimplications for educational practices. It traces the historical development of AI in education, \nhighlighting key milestones and the evolution of AI technologies. Th e paper adopts a theoretical"),
 D

In [22]:
for i, d in enumerate(docs):
    d.metadata = {"doc_id": i}
docs[:5]

[Document(metadata={'doc_id': 0}, page_content="1 \n The Evolution of Artificial Intelligence: A Theoretical Review of its Impact on Teaching \nand Learning in the Digital Age  \nEmerson Abraham Jackson1 \nORCID : https://orcid.org/0000 -0002 -2802 -6152  \nAbstract : \nThis theoretical review explores the evolution of artificial intelligence (AI) and its impact on \nteaching and learning in the digital age. Investigating AI's integration into educational settings, the"),
 Document(metadata={'doc_id': 1}, page_content="teaching and learning in the digital age. Investigating AI's integration into educational settings, the \npaper synthesises theoretical frameworks, empirical studies, and emerging trends. Drawing on  \nconstructivist, socio -cultural, and cognitive learning theories, the review analyses AI's \nimplications for educational practices. It traces the historical development of AI in education, \nhighlighting key milestones and the evolution of AI technologies. Th e paper adop

In [24]:
import os
import google.generativeai as genai
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [52]:
from dotenv import load_dotenv
load_dotenv()
genai.configure(api_key = os.getenv('GEMINI_API'))

In [26]:
# This function takes a a sentence as an arugument and return it's embeddings
def get_embeddings(text):
    # Define the embedding model
    model = 'models/embedding-001'
    # Get the embeddings
    embedding = genai.embed_content(model=model,
                                    content=text,
                                    task_type="retrieval_document")
    return embedding['embedding']

In [45]:
# Get the page_content from the documents and create a new list
content_list = [doc.page_content for doc in docs]
content_list

["1 \n The Evolution of Artificial Intelligence: A Theoretical Review of its Impact on Teaching \nand Learning in the Digital Age  \nEmerson Abraham Jackson1 \nORCID : https://orcid.org/0000 -0002 -2802 -6152  \nAbstract : \nThis theoretical review explores the evolution of artificial intelligence (AI) and its impact on \nteaching and learning in the digital age. Investigating AI's integration into educational settings, the",
 "teaching and learning in the digital age. Investigating AI's integration into educational settings, the \npaper synthesises theoretical frameworks, empirical studies, and emerging trends. Drawing on  \nconstructivist, socio -cultural, and cognitive learning theories, the review analyses AI's \nimplications for educational practices. It traces the historical development of AI in education, \nhighlighting key milestones and the evolution of AI technologies. Th e paper adopts a theoretical",
 "highlighting key milestones and the evolution of AI technologies. Th e p

In [28]:
# Send one page_content at a time
embeddings = [get_embeddings(content) for content in content_list]

In [30]:
print(embeddings)
print(len(embeddings))
print(len(embeddings[0]))

[[0.03381605, -0.067073986, -0.018313628, 0.004443857, 0.06432152, 0.02121541, 0.04868734, -0.02022291, -0.033976138, 0.079644784, 0.032787375, 0.020830432, -0.006443928, 0.010829236, 0.013417124, -0.043587875, 0.015876599, 0.04263375, -0.014048936, -0.00033088517, 0.035859447, -0.010523758, 0.034210518, -0.0036034284, -0.0046845474, 0.01425017, -0.014290354, -0.040507145, -0.018343894, -0.0022431354, -0.0564273, 0.0028200615, -0.041268844, 0.019779678, 0.023590412, -0.07565729, -0.0020278366, 0.016105114, 0.015262228, 0.017410636, -0.017242748, -0.06245622, -0.036456864, 0.011011172, -0.0032064973, -0.08035491, 0.010433875, 0.08178623, 0.04289859, -0.038120147, 0.045538165, 0.02024289, 0.07625005, -0.033698577, 0.046348218, -0.03382236, 0.05324121, 0.04835605, -0.020809043, -0.0073235314, -0.048658308, 0.015253614, -0.013577656, 0.019012688, 0.015360089, -0.055655412, -0.06034593, 0.054060012, 0.038833104, -0.029928656, 0.029497355, -0.010444321, 0.053702515, -0.008504341, 0.004168038

In [32]:
# Create a dataframe to ingest it to the database
dataframe = pd.DataFrame({
    'page_content': content_list,
    'embeddings': embeddings
})
dataframe

Unnamed: 0,page_content,embeddings
0,1 \n The Evolution of Artificial Intelligence:...,"[0.03381605, -0.067073986, -0.018313628, 0.004..."
1,teaching and learning in the digital age. Inve...,"[0.013015941, -0.042444143, -0.05402537, 0.020..."
2,highlighting key milestones and the evolution ...,"[0.01342403, -0.04686725, -0.042393874, 0.0042..."
3,"interaction, and cognitive load management. Th...","[0.013649318, -0.030104425, -0.0481533, -0.011..."
4,"and pedagogy, acknowledging the dynamic interp...","[0.032317717, -0.036998406, -0.019311722, 0.01..."
...,...,...
107,"Johnson, L., Adams Becker, S., Cummins, M., Es...","[0.031409144, -0.017295081, -0.061099924, -0.0..."
108,"Science to the Classroom. In K. Sawyer (Ed.), ...","[0.01576563, -0.036156785, -0.030314859, -0.01..."
109,Argument for AI in Education. Pearson. \nMaye...,"[0.02694593, -0.04740985, -0.0376742, -0.02985..."
110,"Siemens, G., & Long, P. (2011). Penetrating th...","[0.07055509, -0.02423263, -0.039817784, -0.006..."


In [36]:
import chromadb
chroma_client = chromadb.Client()

In [34]:
collection = chroma_client.create_collection(name="my_collection")

In [None]:
# preparing to be added in chromadb

documents = []
metadata = []
ids = []

for chunk in chunks:
    documents.append(chunk.page_content)
    ids.append("ID"+str(i))
    metadata.append(chunk.metadata)

    i += 1

In [37]:
# Add each chunk with its embedding to the collection
for i, row in dataframe.iterrows():
    collection.add(
        documents=[row['page_content']],
        ids=[f"doc_{i}"],
        embeddings=[row['embeddings']],
        metadatas=[{"doc_id": i}]
    )

In [38]:
collection

Collection(id=b9652d27-34d6-406d-91c8-308c6e9de14b, name=my_collection)