In [7]:
pip install numpy pandas scikit-learn yfinance


Note: you may need to restart the kernel to use updated packages.


In [8]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Fetch stock data from Yahoo Finance
def get_stock_data(ticker, period='1y', interval='1d'):
    # Download the stock price data
    data = yf.download(ticker, period=period, interval=interval)
    data['Date'] = data.index
    return data

# Prepare the dataset for training the model
def prepare_data(df, feature_columns, target_column, test_size=0.2):
    X = df[feature_columns]
    y = df[target_column]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    return X_train, X_test, y_train, y_test

# Define the ticker symbol for an Indian stock and the time period for the data
ticker = 'RELIANCE.NS'  # Example for Reliance Industries Limited
period = '1y'
interval = '1d'

# Fetch the stock data
stock_data = get_stock_data(ticker, period, interval)

# Feature engineering: Use only 'Open', 'High', 'Low', 'Close' for simplicity
feature_columns = ['Open', 'High', 'Low', 'Close']
target_column = 'Close'

# Prepare the data
X_train, X_test, y_train, y_test = prepare_data(stock_data, feature_columns, target_column)

# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Predict future prices
# future_prices = model.predict(future_data)


[*********************100%***********************]  1 of 1 completed
Mean Squared Error: 2.0679515313825692e-25


In [9]:
pip install numpy pandas scikit-learn yfinance


Note: you may need to restart the kernel to use updated packages.


In [10]:
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd

# Function to download stock data
def get_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    data.reset_index(inplace=True)
    return data

# Function to prepare features and target variables
def feature_target_split(data, target_column, n_lag_days=5):
    # Create lag features
    for i in range(1, n_lag_days + 1):
        data[f'lag_{i}'] = data[target_column].shift(i)
    data = data.dropna()
    X = data[[f'lag_{i}' for i in range(1, n_lag_days + 1)]]
    y = data[target_column]
    return X, y

# Function to create and evaluate a model
def create_and_evaluate_model(X_train, X_test, y_train, y_test):
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")
    return model

# Get data for Nifty and Sensex
nifty_data = get_stock_data("^NSEI", "2020-01-01", "2023-01-01")
sensex_data = get_stock_data("^BSESN", "2020-01-01", "2023-01-01")

# Get data for Tata Motors
tata_motors_data = get_stock_data("TATAMOTORS.NS", "2020-01-01", "2023-01-01")

# Prepare the data
nifty_X, nifty_y = feature_target_split(nifty_data, 'Close')
sensex_X, sensex_y = feature_target_split(sensex_data, 'Close')
tata_motors_X, tata_motors_y = feature_target_split(tata_motors_data, 'Close')

# Split the data into training and testing datasets
nifty_X_train, nifty_X_test, nifty_y_train, nifty_y_test = train_test_split(nifty_X, nifty_y, test_size=0.2, random_state=42)
sensex_X_train, sensex_X_test, sensex_y_train, sensex_y_test = train_test_split(sensex_X, sensex_y, test_size=0.2, random_state=42)
tata_motors_X_train, tata_motors_X_test, tata_motors_y_train, tata_motors_y_test = train_test_split(tata_motors_X, tata_motors_y, test_size=0.2, random_state=42)

# Create and evaluate models
nifty_model = create_and_evaluate_model(nifty_X_train, nifty_X_test, nifty_y_train, nifty_y_test)
sensex_model = create_and_evaluate_model(sensex_X_train, sensex_X_test, sensex_y_train, sensex_y_test)
tata_motors_model = create_and_evaluate_model(tata_motors_X_train, tata_motors_X_test, tata_motors_y_train, tata_motors_y_test)

# Make future predictions (Example for Tata Motors)
# future_data = ...
# tata_motors_predictions = tata_motors_model.predict(future_data)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Mean Squared Error: 45042.60543141834
R^2 Score: 0.9949956830101939
Mean Squared Error: 383212.80687513034
R^2 Score: 0.9960346032469262
Mean Squared Error: 74.74217867558829
R^2 Score: 0.9963794414342559


In [13]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Download historical data for a stock
ticker = 'AAPL'  # You can change this to any stock of your choice
data = yf.download(ticker, start='2020-01-01', end='2023-01-01')

# Feature Engineering: Calculate daily returns and other technical indicators
data['Return'] = data['Close'].pct_change()
data['MA10'] = data['Close'].rolling(window=10).mean()
data['MA50'] = data['Close'].rolling(window=50).mean()
data['Volume_Change'] = data['Volume'].pct_change()
data['Volatility'] = data['Return'].rolling(window=10).std()

# Target Variable: Whether the stock will go up (1) or down (0) the next day
data['Target'] = np.where(data['Return'].shift(-1) > 0, 1, 0)

# Drop rows with NaN values
data = data.dropna()

# Prepare features and target
features = ['Return', 'MA10', 'MA50', 'Volume_Change', 'Volatility']
X = data[features]
y = data['Target']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Example: Making a trade decision for the next day
# If the prediction is 1, we buy; if it's 0, we sell/short
today_features = np.array([[
    data.iloc[-1]['Return'],
    data.iloc[-1]['MA10'],
    data.iloc[-1]['MA50'],
    data.iloc[-1]['Volume_Change'],
    data.iloc[-1]['Volatility']
]])
trade_decision = model.predict(today_features)
print('Trade Decision for Tomorrow:', 'Buy' if trade_decision[0] == 1 else 'Sell')


[*********************100%***********************]  1 of 1 completed
Accuracy: 0.52
Trade Decision for Tomorrow: Sell




In [14]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Download historical data for Tata Steel
ticker = 'TATASTEEL.NS'  # Ticker symbol for Tata Steel on NSE
data = yf.download(ticker, start='2020-01-01', end='2023-01-01')

# Feature Engineering: Calculate daily returns and other technical indicators
data['Return'] = data['Close'].pct_change()
data['MA10'] = data['Close'].rolling(window=10).mean()
data['MA50'] = data['Close'].rolling(window=50).mean()
data['Volume_Change'] = data['Volume'].pct_change()
data['Volatility'] = data['Return'].rolling(window=10).std()

# Target Variable: Whether the stock will go up (1) or down (0) the next day
data['Target'] = np.where(data['Return'].shift(-1) > 0, 1, 0)

# Drop rows with NaN values
data = data.dropna()

# Prepare features and target
features = ['Return', 'MA10', 'MA50', 'Volume_Change', 'Volatility']
X = data[features]
y = data['Target']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Example: Making a trade decision for the next day
# If the prediction is 1, we buy; if it's 0, we sell/short
today_features = np.array([[
    data.iloc[-1]['Return'],
    data.iloc[-1]['MA10'],
    data.iloc[-1]['MA50'],
    data.iloc[-1]['Volume_Change'],
    data.iloc[-1]['Volatility']
]])
trade_decision = model.predict(today_features)
print('Trade Decision for Tomorrow:', 'Buy' if trade_decision[0] == 1 else 'Sell')


[*********************100%***********************]  1 of 1 completed
Accuracy: 0.54
Trade Decision for Tomorrow: Sell




In [15]:
pip install numpy pandas scikit-learn yfinance


Note: you may need to restart the kernel to use updated packages.


In [16]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Fetch historical data for a stock (using Tata Steel as an example)
ticker = 'TATASTEEL.NS'
data = yf.download(ticker, start='2018-01-01', end='2023-01-01')

# Feature Engineering
# Calculate technical indicators like moving averages, RSI, etc.
data['SMA_20'] = data['Close'].rolling(window=20).mean()
data['SMA_50'] = data['Close'].rolling(window=50).mean()
data['Volatility'] = data['Close'].rolling(window=20).std()

# Shift the closing price forward to predict future prices
forecast_days = 5
data['Future_Close'] = data['Close'].shift(-forecast_days)

# Drop NaN values created by rolling windows and shifting
data.dropna(inplace=True)

# Features and target
X = data[['SMA_20', 'SMA_50', 'Volatility', 'Volume']]
y = data['Future_Close']

# Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model initialization and training
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)

# Cross-validation
cv_scores = cross_val_score(model, X_scaled, y, cv=5)

# Model evaluation
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error: {mse}")
print(f"Cross-validated scores: {cv_scores}")

# Making predictions
# The output can be used for investment decisions, risk management, or further analysis
predicted_prices = model.predict(X_test)


[*********************100%***********************]  1 of 1 completed
Mean Squared Error: 14.359597352112505
Cross-validated scores: [-0.69409989  0.6135132   0.60856603  0.60292092  0.14462653]


In [18]:
import numpy as np 
import pandas as pd 
import yfinance as yf 
from sklearn.ensemble import GradientBoostingRegressor 
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error 
from sklearn.preprocessing import StandardScaler 

ticker = 'TATASTEEL.NS'
data = yf.download(ticker, start='2018-01-01', end='2023-01-01')
data['SMA_20'] = data['Close'].rolling(window=20).mean()
data['SMA_50'] = data['Close'].rolling(window=50).mean()
data['Volatility'] = data['Close'].rolling(window=20).std()
forecast_days = 5 
data['Future_Close'] = data['Close'].shift(-forecast_days)
data.dropna(inplace=True)
X = data[['SMA_20', 'SMA_50', 'Volatility', 'Volume']]
y = data['Future_Close']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)
# Cross-validation
cv_scores = cross_val_score(model, X_scaled, y, cv=5)

# Model evaluation
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error: {mse}")
print(f"Cross-validated scores: {cv_scores}")

# Making predictions
# The output can be used for investment decisions, risk management, or further analysis
predicted_prices = model.predict(X_test)


[*********************100%***********************]  1 of 1 completed
Mean Squared Error: 14.359597352112505
Cross-validated scores: [-0.69409989  0.6135132   0.60856603  0.60292092  0.14462653]


In [19]:
pip install transformers


Note: you may need to restart the kernel to use updated packages.


In [20]:
from transformers import pipeline, set_seed

def generate_prompt(prompt_text, model_name='gpt2', max_length=50, temperature=0.7):
    """
    Generate text based on a prompt using a pre-trained model.

    :param prompt_text: The prompt text to feed into the model.
    :param model_name: The pre-trained model to use.
    :param max_length: The maximum length of the generated sequence.
    :param temperature: The sampling temperature to use.
    :return: The generated text.
    """

    generator = pipeline('text-generation', model=model_name)
    set_seed(42)
    generated_text = generator(prompt_text, max_length=max_length, temperature=temperature)[0]['generated_text']
    
    return generated_text

# Example usage
prompt_description = "Translate the following sentences from English to French."
prompt_examples = [
    "Hello, how are you? - Bonjour, comment ça va?",
    "What is your name? - Comment vous appelez-vous?"
]

# Create the prompt
prompt_text = f"{prompt_description}\n"
for example in prompt_examples:
    prompt_text += f"{example}\n"

# Generate the response
response = generate_prompt(prompt_text)
print(response)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Translate the following sentences from English to French.
Hello, how are you? - Bonjour, comment ça va?
What is your name? - Comment vous appelez-vous?
I am Bonjour


In [21]:
from transformers import MarianMTModel, MarianTokenizer

def translate(text, src_language="en", target_language="fr"):
    """
    Translate text from a source language to a target language using the Helsinki-NLP models.
    
    :param text: The text to translate.
    :param src_language: The source language (ISO 639-1 code).
    :param target_language: The target language (ISO 639-1 code).
    :return: The translated text.
    """
    
    # Define the model repository path
    model_name = f'Helsinki-NLP/opus-mt-{src_language}-{target_language}'

    # Initialize the tokenizer and model
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    # Tokenize the text
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))

    # Decode the tokenized text
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)

    return translated_text

# Example usage
source_text = "Hello, how are you?"
# Translate from English to French
translated_text = translate(source_text, "en", "fr")
print(f"Translated text: {translated_text}")

# To translate to other languages, you just need to change the target_language parameter
# to the ISO 639-1 code of the target language. For example, for Spanish it would be "es", for German "de", etc.


ImportError: 
MarianTokenizer requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.


In [22]:
pip install sentencepiece


Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp39-cp39-win_amd64.whl (977 kB)
     -------------------------------------- 977.6/977.6 kB 6.2 MB/s eta 0:00:00
Installing collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99
Note: you may need to restart the kernel to use updated packages.


In [2]:
from transformers import MarianMTModel, MarianTokenizer

def translate(text, src_language="en", target_language="fr"):
    """
    Translate text from a source language to a target language using the Helsinki-NLP models.
    
    :param text: The text to translate.
    :param src_language: The source language (ISO 639-1 code).
    :param target_language: The target language (ISO 639-1 code).
    :return: The translated text.
    """
    
    # Define the model repository path
    model_name = f'Helsinki-NLP/opus-mt-{src_language}-{target_language}'

    # Initialize the tokenizer and model
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    # Tokenize the text
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))

    # Decode the tokenized text
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)

    return translated_text

# Example usage
source_text = "Hello, how are you?"
# Translate from English to French
translated_text = translate(source_text, "en", "fr")
print(f"Translated text: {translated_text}")

# To translate to other languages, you just need to change the target_language parameter
# to the ISO 639-1 code of the target language. For example, for Spanish it would be "es", for German "de", etc.




Translated text: Bonjour, comment allez-vous?


In [1]:
from transformers import MarianMTModel, MarianTokenizer

def translate(text, src_language="en", target_language="fr"):
    """
    Translate text from a source language to a target language using the Helsinki-NLP models.
    
    :param text: The text to translate.
    :param src_language: The source language (ISO 639-1 code).
    :param target_language: The target language (ISO 639-1 code).
    :return: The translated text.
    """
    
    # Define the model repository path
    model_name = f'Helsinki-NLP/opus-mt-{src_language}-{target_language}'

    # Initialize the tokenizer and model
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    # Tokenize the text
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))

    # Decode the tokenized text
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)

    return translated_text

# Example usage
source_text = "Hello, how are you?"
# Translate from English to French
translated_text = translate(source_text, "en", "fr")
print(f"Translated text: {translated_text}")


Downloading (…)okenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)olve/main/source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

Downloading (…)olve/main/target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]



Downloading pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Translated text: Bonjour, comment allez-vous?
