In [1]:
import pandas as pd

In [16]:
with open('Google_cocall.txt', 'r', encoding = 'utf-8') as f:
    input = f.read()

print((input))

Alphabet Inc. (NASDAQ:GOOG) Q3 2023 Earnings Call Transcript October 24, 2023 4:30 PM ET

Company Participants

Jim Friedland - Director of Investor Relations

Sundar Pichai - Chief Executive Officer

Philipp Schindler - Chief Business Officer

Ruth Porat - Chief Financial Officer

Conference Call Participants

Brian Nowak - Morgan Stanley

Doug Anmuth - JPMorgan

Eric Sheridan - Goldman Sachs

Lloyd Walmsley - UBS

Michael Nathanson - MoffettNathanson

Justin Post - Bank of America

Ken Gawrelski - Wells Fargo

Mark Mahaney - Evercore

Operator

Welcome, everyone. Thank you for standing by for the Alphabet Third Quarter 2023 Earnings Conference Call. At this time, all participants are in listen-only mode. After the speaker presentation, there will be a question-and-answer session. [Operator Instructions] I would now like to hand the conference over to your speaker today, Jim Friedland, Director of Investor Relations. Please go ahead.

Jim Friedland

Thank you. Good afternoon, everyone

In [20]:
input

"Alphabet Inc. (NASDAQ:GOOG) Q3 2023 Earnings Call Transcript October 24, 2023 4:30 PM ET\n\nCompany Participants\n\nJim Friedland - Director of Investor Relations\n\nSundar Pichai - Chief Executive Officer\n\nPhilipp Schindler - Chief Business Officer\n\nRuth Porat - Chief Financial Officer\n\nConference Call Participants\n\nBrian Nowak - Morgan Stanley\n\nDoug Anmuth - JPMorgan\n\nEric Sheridan - Goldman Sachs\n\nLloyd Walmsley - UBS\n\nMichael Nathanson - MoffettNathanson\n\nJustin Post - Bank of America\n\nKen Gawrelski - Wells Fargo\n\nMark Mahaney - Evercore\n\nOperator\n\nWelcome, everyone. Thank you for standing by for the Alphabet Third Quarter 2023 Earnings Conference Call. At this time, all participants are in listen-only mode. After the speaker presentation, there will be a question-and-answer session. [Operator Instructions] I would now like to hand the conference over to your speaker today, Jim Friedland, Director of Investor Relations. Please go ahead.\n\nJim Friedland\n

In [26]:
chars = sorted(list(set(input)))
vocab_size = len(chars)
print(' '.join(chars))
print(vocab_size)


   $ % & ' ( ) , - . / 0 1 2 3 4 5 6 7 8 9 : ? A B C D E F G H I J K L M N O P Q R S T U V W Y [ ] a b c d e f g h i j k l m n o p q r s t u v w x y z ’
77


In [28]:
stoi = {ch:i for i,ch in enumerate(chars)}
itos = {i:ch for i,ch in enumerate(chars)}

encode = lambda s : [ stoi[c] for c in s]
decode = lambda i : [ ''.join(itos[i] for i in i)]

In [35]:
print(encode("Hii"))
print(decode([31, 58, 58]))

[31, 58, 58]
['Hii']


### Creating tensors for the text using Torch

In [39]:
import torch

data = torch.tensor(encode(input), dtype= torch.long)
print(data.shape, data.type)
print(data[:100])

torch.Size([46962]) <built-in method type of Tensor object at 0x1343cd7f0>
tensor([24, 61, 65, 57, 50, 51, 54, 69,  1, 32, 63, 52, 10,  1,  6, 37, 24, 42,
        27, 24, 40, 22, 30, 38, 38, 30,  7,  1, 40, 15,  1, 14, 12, 14, 15,  1,
        28, 50, 67, 63, 58, 63, 56, 68,  1, 26, 50, 61, 61,  1, 43, 67, 50, 63,
        68, 52, 67, 58, 65, 69,  1, 38, 52, 69, 64, 51, 54, 67,  1, 14, 16,  8,
         1, 14, 12, 14, 15,  1, 16, 22, 15, 12,  1, 39, 36,  1, 28, 43,  0,  0,
        26, 64, 62, 65, 50, 63, 74,  1, 39, 50])


### Splitting dataset into train and Validationj

In [43]:
n = int(len(input)*0.9)
train = data[:n]
val = data[n:]

## Creating a blocksize context and target data set
### When we give a context the model has to predict the target
### The input output model is a sequential model
### a->b, a,b ->c, a,b,c -> d

In [44]:
block_size = 8 ## taking block size of 8
train[:block_size+1]

tensor([24, 61, 65, 57, 50, 51, 54, 69,  1])

In [47]:
x = train[:block_size]
y = train[1:block_size+1]

for i in range(block_size):
    context = x[:i+1]
    target = y[i]

    print(f'When context is {context} then {target}')



When context is tensor([24]) then 61
When context is tensor([24, 61]) then 65
When context is tensor([24, 61, 65]) then 57
When context is tensor([24, 61, 65, 57]) then 50
When context is tensor([24, 61, 65, 57, 50]) then 51
When context is tensor([24, 61, 65, 57, 50, 51]) then 54
When context is tensor([24, 61, 65, 57, 50, 51, 54]) then 69
When context is tensor([24, 61, 65, 57, 50, 51, 54, 69]) then 1


### Creating batches of the input(context) and output(target) data
#### We want to make use of teh parallel processing computing power of GPUs
#### We will create the single stack of the block sized data into multiple chunks of batch size data
#### Lets say we have 96 we will split them first into block size data of 8(block size)*12
#### now we will split the data onto chunks of batches of size lets say 4 
#### This becomes three batches of data => 8* 3, 8* 3, 8* 3 , 8* 3 (Total 4 bathces of data)
#### All these 4bathces are trained in parallel 

In [84]:
torch.manual_seed(42)

batch_size = 4 ## No.of small independent sets for parallel training
block_size = 8 ## Maximum context length for predictions 

def getbatch(split):
    data = train if split == "train" else val

    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])

    return x, y


xb, yb = getbatch("train")    
    
print(xb)
print(yb)

for b in range(batch_size):
    for t in range(block_size):
        context = xb[b,:t+1]
        target = yb[b,t]
        print(f"when context is {context.tolist()} then {target}")

tensor([[70, 68, 61, 74,  8,  1, 69, 57],
        [43, 57, 50, 63, 60,  1, 74, 64],
        [ 1, 69, 57, 58, 63, 56,  1, 51],
        [69, 64,  1, 54, 73, 54, 52, 70]])
tensor([[68, 61, 74,  8,  1, 69, 57, 58],
        [57, 50, 63, 60,  1, 74, 64, 70],
        [69, 57, 58, 63, 56,  1, 51, 54],
        [64,  1, 54, 73, 54, 52, 70, 69]])
when context is [70] then 68
when context is [70, 68] then 61
when context is [70, 68, 61] then 74
when context is [70, 68, 61, 74] then 8
when context is [70, 68, 61, 74, 8] then 1
when context is [70, 68, 61, 74, 8, 1] then 69
when context is [70, 68, 61, 74, 8, 1, 69] then 57
when context is [70, 68, 61, 74, 8, 1, 69, 57] then 58
when context is [43] then 57
when context is [43, 57] then 50
when context is [43, 57, 50] then 63
when context is [43, 57, 50, 63] then 60
when context is [43, 57, 50, 63, 60] then 1
when context is [43, 57, 50, 63, 60, 1] then 74
when context is [43, 57, 50, 63, 60, 1, 74] then 64
when context is [43, 57, 50, 63, 60, 1, 74,

In [59]:
torch.randint(46954, (batch_size,))

tensor([ 3236, 19147, 25043,  1569])

In [63]:
len(train)

42265

### Creating a NN model using Bigramlanguagemodel(BLM)
#### We pass inputs xb , yb to the BLM 
#### Get the embeddings for the inputs using the vocab size 
#### Apply embedding table on the inputs xb
#### Transform the shapes of input and output according to CrossEntropyLoss

In [79]:
import torch
import torch.nn as nn
from torch.nn import functional as F 
torch.manual_seed(42)

class BigramLanguagemodel(nn.Module):

    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)## Gives the embeddings on the inputs

    def forward(self, idx, targets=None):
        logits = self.token_embedding_table(idx) ## (B batchsize, T block size , C Channels)
        if targets is None:
            loss = None
        else :
            B,T,C = logits.shape
            logits = logits.view(B*T, C)## Converting the shapes of the input to match loss function
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss
    
    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
                
            # idx is (B batchsize, T block size) in the current context
            logits, loss = self(idx)
            #focus only on the last time step
            logits = logits[:,-1,:] # Becomes B C
            # Apply softmax to get probabilities
            probs = F.softmax(logits, dim =1)#(B C)
            #Sample from the distribution
            idx_next = torch.multinomial(probs, num_samples=1) # B =1
            # Append sample index to the running equence
            idx = torch.cat((idx, idx_next), dim =1) # B* T+1
        return idx


m = BigramLanguagemodel(vocab_size)
logits, loss = m(xb, yb)

print(loss.shape, logits.shape)
print(loss)


print(decode(m.generate(idx = torch.zeros((1,1), dtype = torch.long), max_new_tokens=100)[0].tolist()))

torch.Size([]) torch.Size([32, 77])
tensor(4.8042, grad_fn=<NllLossBackward0>)
['\nDkx7kaC/W.6efJxo’P6vm3 nye3 BQ:82DysaTpm:&d:ggrl6’ma?ma7jPUpi&7t-JV4a:2Ad4Hs\n uHGYKFLaLhjfUpVi/p9E?-']


In [80]:
# Crearte Pytorch optimizer
optimizer = torch.optim.Adam(m.parameters(), lr = 1e-3)

In [87]:
batch_size = 32
for steps in range(20000):
    # Sample of a batch of data
    xb, yb = getbatch('train')
    # evaluate the loss 
    logits, loss = m(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
print(loss.item())

2.3883168697357178


In [89]:
context = torch.zeros((1,1), dtype = torch.long)
print(decode(m.generate(context, max_new_tokens=400)[0].tolist()))

["\n\n\nAInen, permoitin Yomeve od bevear tind nond w cro to ct herklipre w ivewhesince Phiog.3 candinds bechingee ar arolse aco Cllitiod. t re wa quierores?\n\nAngre vern. to od th hasitilis Youbuglutorutstu pe we tile Picomickiout ompomerompalend ally. t'stedowill n TuTerng tiveds s, inuroberl atin we ti\n\n\nIntrnipus, u cu. Andreme woges a d ie or. r anutewatizenar be Gote th ian ullern th d ldued thefur"]


# Self Attention

In [91]:
## Example 
torch.manual_seed(42)
B,T,C = 4,8,2
x = torch.randn(B, T, C)
x.shape

torch.Size([4, 8, 2])

In [113]:
## Initialize weights 
head_size = 16
query = nn.Linear(C, head_size, bias = False)
key = nn.Linear(C, head_size, bias = False)

q = query(x)
k = key(x)
wei = q@k.transpose(-2, -1)* C**-0.5 # (B, T, 16) @ (B, 16, T) -> (B, T,T)

#wei = torch.zeros(T,T)
tril = torch.tril(torch.ones(T, T))
wei = wei.masked_fill(tril ==0, float('-inf'))
wei = F.softmax(wei, dim =-1)

value = nn.Linear(C, head_size, bias = False)
val = value(x)
out = wei@val

out.shape


torch.Size([4, 8, 16])

In [114]:
wei[0]

tensor([[1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0179, 0.9821, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0563, 0.5513, 0.3924, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0144, 0.4238, 0.2111, 0.3507, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.7077, 0.0314, 0.0516, 0.0267, 0.1825, 0.0000, 0.0000, 0.0000],
        [0.0151, 0.3397, 0.1674, 0.2456, 0.0173, 0.2149, 0.0000, 0.0000],
        [0.0614, 0.2745, 0.1769, 0.1867, 0.0369, 0.1607, 0.1029, 0.0000],
        [0.3476, 0.0994, 0.1114, 0.0764, 0.1226, 0.0695, 0.0740, 0.0990]],
       grad_fn=<SelectBackward0>)