Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issues with socket and reading url #60

Open
clarence-ong opened this issue Mar 14, 2024 · 1 comment
Open

Issues with socket and reading url #60

clarence-ong opened this issue Mar 14, 2024 · 1 comment
Labels
fix Bug fix.

Comments

@clarence-ong
Copy link

Describe the bug

I am getting 2 errors when running the extract_train_data function of the SoccerDataLoader class

gaierror: [Errno 11001] getaddrinfo failed
URLError

Steps or code to reproduce the bug

from sportsbet.datasets import SoccerDataLoader
dataloader = SoccerDataLoader(param_grid={'league': ['Italy'], 'year': [2020]})
X_train, Y_train, O_train = dataloader.extract_train_data(odds_type='market_maximum', drop_na_thres=1.0)
X_fix, Y_fix, O_fix = dataloader.extract_fixtures_data()

Expected Results

Produce the X_train and Y_train successfully

Actual Results

In [6]: X_train, Y_train, O_train = dataloader.extract_train_data(odds_type='market_maximum', drop_na_thres=1.0)

gaierror Traceback (most recent call last)
File C:\Program Files\Python311\Lib\urllib\request.py:1348, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1347 try:
-> 1348 h.request(req.get_method(), req.selector, req.data, headers,
1349 encode_chunked=req.has_header('Transfer-encoding'))
1350 except OSError as err: # timeout error

File C:\Program Files\Python311\Lib\http\client.py:1286, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1285 """Send a complete request to the server."""
-> 1286 self._send_request(method, url, body, headers, encode_chunked)

File C:\Program Files\Python311\Lib\http\client.py:1332, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1331 body = _encode(body, 'body')
-> 1332 self.endheaders(body, encode_chunked=encode_chunked)

File C:\Program Files\Python311\Lib\http\client.py:1281, in HTTPConnection.endheaders(self, message_body, encode_chunked)
1280 raise CannotSendHeader()
-> 1281 self._send_output(message_body, encode_chunked=encode_chunked)

File C:\Program Files\Python311\Lib\http\client.py:1041, in HTTPConnection._send_output(self, message_body, encode_chunked)
1040 del self._buffer[:]
-> 1041 self.send(msg)
1043 if message_body is not None:
1044
1045 # create a consistent interface to message_body

File C:\Program Files\Python311\Lib\http\client.py:979, in HTTPConnection.send(self, data)
978 if self.auto_open:
--> 979 self.connect()
980 else:

File C:\Program Files\Python311\Lib\http\client.py:1451, in HTTPSConnection.connect(self)
1449 "Connect to a host on a given (SSL) port."
-> 1451 super().connect()
1453 if self._tunnel_host:

File C:\Program Files\Python311\Lib\http\client.py:945, in HTTPConnection.connect(self)
944 sys.audit("http.client.connect", self, self.host, self.port)
--> 945 self.sock = self._create_connection(
946 (self.host,self.port), self.timeout, self.source_address)
947 # Might fail in OSs that don't implement TCP_NODELAY

File C:\Program Files\Python311\Lib\socket.py:827, in create_connection(address, timeout, source_address, all_errors)
826 exceptions = []
--> 827 for res in getaddrinfo(host, port, 0, SOCK_STREAM):
828 af, socktype, proto, canonname, sa = res

File C:\Program Files\Python311\Lib\socket.py:962, in getaddrinfo(host, port, family, type, proto, flags)
961 addrlist = []
--> 962 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
963 af, socktype, proto, canonname, sa = res

gaierror: [Errno 11001] getaddrinfo failed

During handling of the above exception, another exception occurred:

URLError Traceback (most recent call last)
Cell In[6], line 1
----> 1 X_train, Y_train, O_train = dataloader.extract_train_data(odds_type='market_maximum', drop_na_thres=1.0)

File D:\My Projects\sports-betting\sports_bet_env\Lib\site-packages\sportsbet\datasets_soccer_data.py:473, in SoccerDataLoader.extract_train_data(self, drop_na_thres, odds_type)
472 def extract_train_data(self, drop_na_thres=0.0, odds_type=None):
--> 473 X, Y, O = super(SoccerDataLoader, self).extract_train_data(
474 drop_na_thres, odds_type
475 )
476 self.input_cols_ = pd.Index(
477 [col for col in self.input_cols_ if col != 'data_source'], dtype=object
478 )
479 X = X.reset_index().drop_duplicates(subset=self.input_cols_)

File D:\My Projects\sports-betting\sports_bet_env\Lib\site-packages\sportsbet\datasets_base.py:284, in _BaseDataLoader.extract_train_data(self, drop_na_thres, odds_type)
281 self._check_param_grid()
283 # Validate the data
--> 284 data = self._validate_data()
286 # Extract train data
287 data = self._extract_train_data(data)

File D:\My Projects\sports-betting\sports_bet_env\Lib\site-packages\sportsbet\datasets_base.py:154, in _BaseDataLoader._validate_data(self)
152 def _validate_data(self):
153 """Validate the data."""
--> 154 data = self._get_data()
155 if not isinstance(data, pd.DataFrame):
156 raise TypeError(
157 'Data should be a pandas dataframe. Got '
158 f'{type(data).name} instead.'
159 )

File D:\My Projects\sports-betting\sports_bet_env\Lib\site-packages\sportsbet\datasets_soccer_data.py:451, in SoccerDataLoader._get_data(self)
438 param_grids[data_source] = [
439 {
440 param: [val]
(...)
445 if params['data_source'] == data_source
446 ]
447 if param_grids['footballdata']:
448 fd_data = (
449 _FDSoccerDataLoader(param_grids['footballdata'])
450 ._check_param_grid()
--> 451 ._validate_data()
452 .reset_index()
453 )
454 if not param_grids['fivethirtyeight']:
455 return fd_data.assign(data_source='footballdata')

File D:\My Projects\sports-betting\sports_bet_env\Lib\site-packages\sportsbet\datasets_base.py:154, in _BaseDataLoader._validate_data(self)
152 def _validate_data(self):
153 """Validate the data."""
--> 154 data = self._get_data()
155 if not isinstance(data, pd.DataFrame):
156 raise TypeError(
157 'Data should be a pandas dataframe. Got '
158 f'{type(data).name} instead.'
159 )

File D:\My Projects\sports-betting\sports_bet_env\Lib\site-packages\sportsbet\datasets_soccer_fd.py:535, in _FDSoccerDataLoader._get_data(self)
532 urls = param_grid_to_csv_urls(self.param_grid)
533 for params, url in urls:
--> 535 data = _read_csv(url).replace('#REF!', np.nan)
536 try:
537 data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')

File D:\My Projects\sports-betting\sports_bet_env\Lib\site-packages\sportsbet\datasets_soccer_utils.py:80, in _read_csv(url)
78 def _read_csv(url):
79 """Read csv file from U

Environment

Python 3.11.4

@clarence-ong clarence-ong added the fix Bug fix. label Mar 14, 2024
@georgedouzas
Copy link
Owner

Hi @clarence-ong,

Python 3.11 is not supported yet. Can you use Python 3.9 or 3.10 and retry? I can not reproduce your error.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
fix Bug fix.
Projects
None yet
Development

No branches or pull requests

2 participants