<a href="https://colab.research.google.com/github/entajari/PerSpellData/blob/main/QlearningProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install requests numpy pandas tensorflow



In [2]:
import requests
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [3]:
class QAgent:
    def __init__(self, state_size, action_size, learning_rate=0.001, discount_rate=0.99, epsilon=1.0, epsilon_decay=0.999, epsilon_min=0.01):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.discount_rate = discount_rate
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def choose_action(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)
        q_values = self.model.predict(state)
        print(np.argmax(q_values[0]))
        return np.argmax(q_values[0])

    def update_epsilon(self):
        self.epsilon = max(self.epsilon * self.epsilon_decay, self.epsilon_min)

    def train(self, state, action, reward, next_state, done):
        target = reward
        if not done:
            target += self.discount_rate * np.amax(self.model.predict(next_state)[0])
        q_values = self.model.predict(state)
        q_values[0][action] = target
        self.model.fit(state, q_values, verbose=0)

In [6]:
def get_project_info(project_id):
    url = f"https://api.coingecko.com/api/v3/coins/{project_id}"
    response = requests.get(url)
    data = response.json()
    initial_coin = data.get('market_data', {}).get('total_supply', 0)
    initial_price = data.get('market_data', {}).get('current_price', {}).get('usd', 0)
    return initial_coin, initial_price

In [7]:
num_projects = 100  # Number of projects in the dataset
#project_ids = ['bitcoin', 'project2', 'project3']  # List of web3 project IDs

url_list_project = f"https://api.coingecko.com/api/v3/coins/list"
response_url = requests.get(url_list_project)
list_project = response_url.json()

project_ids = [list_project[i]['id'] for i in range(len(list_project))] #['bitcoin'] List of web3 project IDs
state_size = 3  # Number of state features (returns, initial coin, initial price)
action_size = 2  # Number of possible actions (vote for/against a project)

In [8]:
agent = QAgent(state_size, action_size)
columns = ['Returns', 'Initial Coin', 'Initial Price', 'Action', 'Reward', 'Next Returns', 'Next Initial Coin', 'Next Initial Price']
dataset = pd.DataFrame(columns=columns)
#dataset.head()

In [9]:
# from IPython.display import display
# import pandas as pd
# # Assuming you have a DataFrame named 'df'

# df = pd.DataFrame(dataset)
# display(df)

In [10]:
def update_dataset_and_train(state, action, reward, next_state):
    next_returns, next_initial_coin, next_initial_price = get_project_info(project_ids[action])
    next_state = np.array([next_returns, next_initial_coin, next_initial_price]).reshape(1, state_size)
    dataset.loc[len(dataset)] = [state[0][0], state[0][1], state[0][2], action, reward, next_returns, next_initial_coin, next_initial_price]
    agent.train(state, action, reward, next_state, False)
    return next_state

In [11]:
def evaluate_agent():
    total_rewards = 0
    state = np.array([0, 0, 0]).reshape(1, state_size)
    for _ in range(num_projects):
        action = agent.choose_action(state)
        next_state = update_dataset_and_train(state, action, 0, None)
        state = next_state
    for _ in range(num_projects):
        action = agent.choose_action(state)
        next_state, reward = get_next_state_and_reward(state, action)
        total_rewards += reward
        state = next_state
    return total_rewards / num_projects


In [12]:
def get_next_state_and_reward(state, action):
    next_returns, next_initial_coin, next_initial_price = get_project_info(project_ids[action])
    next_state = np.array([next_returns, next_initial_coin, next_initial_price]).reshape(1, state_size)
    reward = next_returns - state[0][0]
    return next_state, reward

In [13]:
def get_next_state_and_reward(state, action):
    next_returns, next_initial_coin, next_initial_price = get_project_info(project_ids[action])
    next_state = np.array([next_returns, next_initial_coin, next_initial_price]).reshape(1, state_size)
    reward = next_returns - state[0][0]
    return next_state, reward

In [14]:
for _ in range(1000):
    state = np.array([0, 0, 0]).reshape(1, state_size)
    for _ in range(num_projects):
        action = agent.choose_action(state)
        next_state, reward = get_next_state_and_reward(state, action)
        update_dataset_and_train(state, action, reward, next_state)
        state = next_state
    agent.update_epsilon()
    if _ % 100 == 0:
        average_reward = evaluate_agent()
        print(f"Episode: {_}, Average Reward: {average_reward}")

ValueError: ignored