In [1]:
from bs4 import BeautifulSoup
import requests

In [None]:
def get_chunks(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.text, "html.parser")
    # Our main content is inside table data, so extract that.
    table_data = soup.find("td")
    
    # Delete the unnecessary tags
    a1 = table_data.find("div", id="bottomnextup")
    a2 = table_data.find("div", class_="nexttopiclink")
    a3 = table_data.find('div', class_="bottomnext")
    if a1:
        a1.extract()
    if a2:
        a2.extract()
    if a3:
        a3.extract()

    # Find all <img> tags and extract them
    for img in soup.find_all('img'):
        img.extract()  

    # Extracting all relevant tags: h2, p, ul, li, strong, div
    elements = table_data.find_all(['h2', 'h3', 'p', 'ul', 'li', 'strong', 'div', 'ol'])

    # Devide every tutorial into chunks of paragraph. Chunk should break wherever tags h2 or h3 occur.
    # List to store the chunks of text
    chunks = [] 
    # Temporary list to accumulate words for each chunk
    current_chunk = []  

    # Function to process strong tags and ensure proper spacing after the text with strong tag.
    def process_strong_tag(text):
        # Ensure space after the strong text if not already there
        if text and text[-1] != ' ':
            return text + ' '  
        return text

    # Iterate through the elements
    for element in elements:
        # If it's a new h2 or h3 tag, start a new chunk with a line break before the title
     
        if element.name in ['h2', 'h3']:
            if element.find_parent() and element.find_parent().name == 'td':
                if current_chunk:
                    chunks.append(' '.join(current_chunk))  # Save the current chunk
                    current_chunk = []  # Clear current chunk
                else:
                    pass
            current_chunk.append("\n")  # Add a line break before the new section
            current_chunk.append(f"{element.get_text(strip=True)}")  # Add the title text
            continue
        
        # Process the content within strong tags
        elif element.name == 'strong':
            curr_text = element.get_text(strip=True)
            # Only process if not nested within another tag
            if not element.find_parent():  
                curr_text = process_strong_tag(curr_text)
                current_chunk.append(curr_text)

        # Handle unordered lists with line breaks between ul tags
        elif element.name == 'ul' or element.name == 'ol':
            for li in element.find_all('li'):
                this_text = li.get_text(strip=True)
                for strong_tag in li.find_all('strong'):
                    strong_text = strong_tag.get_text(strip=True)
                    this_text = this_text.replace(strong_text, process_strong_tag(strong_text))
                current_chunk.append(f"\n{this_text}")

        # Special handling for code blocks (textarea within div with class 'codeblock')
        elif element.name == 'div' and 'codeblock' in element.get('class', []):
            code_text = element.get_text(strip=True) 
            if code_text:
                chunks.append(f"\n{code_text}\n")

        # Process <p> tags directly under <td>
        elif element.name == 'p' and element.find_parent().name == 'td':
            current_chunk.append(f"\n{element.get_text(strip=True)}")

    if current_chunk:
        chunks.append(' '.join(current_chunk))
    
    return chunks

In [None]:
url_list_1 = [
    "https://www.javatpoint.com/applications-of-machine-learning",
    "https://www.javatpoint.com/machine-learning-life-cycle",
    "https://www.javatpoint.com/machine-learning-installing-anaconda-and-python",
    "https://www.javatpoint.com/difference-between-artificial-intelligence-and-machine-learning",
    "https://www.javatpoint.com/how-to-get-datasets-for-machine-learning",
    "https://www.javatpoint.com/data-preprocessing-machine-learning",
    "https://www.javatpoint.com/supervised-machine-learning",
    "https://www.javatpoint.com/unsupervised-machine-learning",
    "https://www.javatpoint.com/difference-between-supervised-and-unsupervised-learning",
    "https://www.javatpoint.com/regression-analysis-in-machine-learning",
    "https://www.javatpoint.com/linear-regression-in-machine-learning",
    "https://www.javatpoint.com/simple-linear-regression-in-machine-learning",
    "https://www.javatpoint.com/multiple-linear-regression-in-machine-learning",
    "https://www.javatpoint.com/backward-elimination-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-polynomial-regression",
    "https://www.javatpoint.com/classification-algorithm-in-machine-learning",
    "https://www.javatpoint.com/logistic-regression-in-machine-learning",
    "https://www.javatpoint.com/k-nearest-neighbor-algorithm-for-machine-learning",
    "https://www.javatpoint.com/machine-learning-support-vector-machine-algorithm",
    "https://www.javatpoint.com/machine-learning-naive-bayes-classifier",
    "https://www.javatpoint.com/regression-vs-classification-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-interview-questions",
    "https://www.javatpoint.com/linear-regression-vs-logistic-regression-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-decision-tree-classification-algorithm",
    "https://www.javatpoint.com/machine-learning-random-forest-algorithm",
    "https://www.javatpoint.com/clustering-in-machine-learning",
    "https://www.javatpoint.com/hierarchical-clustering-in-machine-learning",
    "https://www.javatpoint.com/k-means-clustering-algorithm-in-machine-learning",
    "https://www.javatpoint.com/apriori-algorithm-in-machine-learning",
    "https://www.javatpoint.com/association-rule-learning",
    "https://www.javatpoint.com/confusion-matrix-in-machine-learning",
    "https://www.javatpoint.com/cross-validation-in-machine-learning",
    "https://www.javatpoint.com/data-science-vs-machine-learning",
    "https://www.javatpoint.com/machine-learning-vs-deep-learning",
    "https://www.javatpoint.com/dimensionality-reduction-technique",
    "https://www.javatpoint.com/machine-learning-algorithms",
    "https://www.javatpoint.com/overfitting-and-underfitting-in-machine-learning",
    "https://www.javatpoint.com/principal-component-analysis",
    "https://www.javatpoint.com/machine-learning-p-value",
    "https://www.javatpoint.com/regularization-in-machine-learning",
    "https://www.javatpoint.com/examples-of-machine-learning",
    "https://www.javatpoint.com/semi-supervised-learning",
    "https://www.javatpoint.com/essential-mathematics-for-machine-learning",
    "https://www.javatpoint.com/overfitting-in-machine-learning",
    "https://www.javatpoint.com/types-of-encoding-techniques",
    "https://www.javatpoint.com/feature-selection-techniques-in-machine-learning",
    "https://www.javatpoint.com/bias-and-variance-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-tools",
    "https://www.javatpoint.com/prerequisites-for-machine-learning",
    "https://www.javatpoint.com/gradient-descent-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-experts-salary-in-india",
    "https://www.javatpoint.com/machine-learning-models",
    "https://www.javatpoint.com/machine-learning-books",
    "https://www.javatpoint.com/linear-algebra-for-machine-learning",
    "https://www.javatpoint.com/types-of-machine-learning",
    "https://www.javatpoint.com/feature-engineering-for-machine-learning",
    "https://www.javatpoint.com/top-10-machine-learning-courses-in-2021",
    "https://www.javatpoint.com/epoch-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-with-anomaly-detection",
    "https://www.javatpoint.com/epoch",
    "https://www.javatpoint.com/cost-function-in-machine-learning",
    "https://www.javatpoint.com/bayes-theorem-in-machine-learning",
    "https://www.javatpoint.com/perceptron-in-machine-learning",
    "https://www.javatpoint.com/entropy-in-machine-learning",
    "https://www.javatpoint.com/issues-in-machine-learning",
    "https://www.javatpoint.com/precision-and-recall-in-machine-learning",
    "https://www.javatpoint.com/genetic-algorithm-in-machine-learning",
    "https://www.javatpoint.com/normalization-in-machine-learning",
    "https://www.javatpoint.com/adversarial-machine-learning",
    "https://www.javatpoint.com/basic-concepts-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-techniques",
    "https://www.javatpoint.com/automl",
    "https://www.javatpoint.com/demystifying-machine-learning",
    "https://www.javatpoint.com/challenges-of-machine-learning",
    "https://www.javatpoint.com/model-parameter-vs-hyperparameter",
    "https://www.javatpoint.com/hyperparameters-in-machine-learning",
    "https://www.javatpoint.com/importance-of-machine-learning",
    "https://www.javatpoint.com/machine-learning-and-cloud-computing",
    "https://www.javatpoint.com/anti-money-laundering-using-machine-learning",
    "https://www.javatpoint.com/data-science-vs-machine-learning-vs-big-data",
    "https://www.javatpoint.com/popular-machine-learning-platforms",
    "https://www.javatpoint.com/deep-learning-vs-machine-learning-vs-artificial-intelligence",
    "https://www.javatpoint.com/machine-learning-application-in-defense-military",
    "https://www.javatpoint.com/machine-learning-applications-in-media",
    "https://www.javatpoint.com/how-can-machine-learning-be-used-with-blockchain",
    "https://www.javatpoint.com/prerequisites-to-learn-artificial-intelligence-and-machine-learning",
    "https://www.javatpoint.com/list-of-machine-learning-companies-in-india",
    "https://www.javatpoint.com/mathematics-courses-for-machine-learning",
    "https://www.javatpoint.com/probability-and-statistics-books-for-machine-learning",
    "https://www.javatpoint.com/risks-of-machine-learning",
    "https://www.javatpoint.com/best-laptops-for-machine-learning",
    "https://www.javatpoint.com/machine-learning-in-finance",
    "https://www.javatpoint.com/lead-generation-using-machine-learning",
    "https://www.javatpoint.com/machine-learning-and-data-science-certification",
    "https://www.javatpoint.com/what-is-big-data-and-machine-learning",
    "https://www.javatpoint.com/how-to-save-a-machine-learning-model",
    "https://www.javatpoint.com/machine-learning-model-with-teachable-machine",
    "https://www.javatpoint.com/data-structure-for-machine-learning",
    "https://www.javatpoint.com/hypothesis-in-machine-learning",
    "https://www.javatpoint.com/gaussian-discriminant-analysis",
    "https://www.javatpoint.com/how-machine-learning-is-used-by-famous-companies",
    "https://www.javatpoint.com/transfer-learning-in-machine-learning",
    "https://www.javatpoint.com/linear-discriminant-analysis-in-machine-learning",
    "https://www.javatpoint.com/stacking-in-machine-learning",
    "https://www.javatpoint.com/complement-naive-bayes-algorithm",
    "https://www.javatpoint.com/deploy-a-machine-learning-model-using-streamlit-library",
    "https://www.javatpoint.com/different-types-of-methods-for-clustering-algorithms-in-ml",
    "https://www.javatpoint.com/em-algorithm-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-pipeline",
    "https://www.javatpoint.com/exploitation-and-exploration-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-for-trading",
    "https://www.javatpoint.com/data-augmentation-a-tactic-to-improve-the-performance-of-ml",
    "https://www.javatpoint.com/difference-between-coding-in-data-science-and-machine-learning",
    "https://www.javatpoint.com/data-labelling-in-machine-learning",
    "https://www.javatpoint.com/impact-of-deep-learning-on-personalization",
    "https://www.javatpoint.com/major-business-applications-of-cnvolutional-neural-network",
    "https://www.javatpoint.com/mini-batch-k-means-clustering-algorithm",
    "https://www.javatpoint.com/what-is-multilevel-modelling",
    "https://www.javatpoint.com/gbm-in-machine-learning",
    "https://www.javatpoint.com/back-propagation-through-time-rnn",
    "https://www.javatpoint.com/data-preparation-in-machine-learning",
    "https://www.javatpoint.com/predictive-maintenance-using-machine-learning",
    "https://www.javatpoint.com/nlp-analysis-of-restaurant-reviews",
    "https://www.javatpoint.com/what-are-lstm-networks",
    "https://www.javatpoint.com/performance-metrics-in-machine-learning",
    "https://www.javatpoint.com/optimization-using-hopfield-network",
    "https://www.javatpoint.com/data-leakage-in-machine-learning",
    "https://www.javatpoint.com/generative-adversarial-network",
    "https://www.javatpoint.com/machine-learning-for-data-management",
    "https://www.javatpoint.com/tensor-processing-units",
    "https://www.javatpoint.com/train-and-test-datasets-in-machine-learning",
    "https://www.javatpoint.com/how-to-start-with-Machine-mearning",
    "https://www.javatpoint.com/auc-roc-curve-in-machine-learning",
    "https://www.javatpoint.com/targeted-advertising-using-machine-learning",
    "https://www.javatpoint.com/top-10-machine-learning-projects-for-beginners-using-python",
    "https://www.javatpoint.com/what-is-human-in-the-loop-machine-learning",
    "https://www.javatpoint.com/what-is-mlops",
    "https://www.javatpoint.com/k-medoids-clustering-theoretical-explanation",
    "https://www.javatpoint.com/machine-learning-or-software-development-which-is-better",
    "https://www.javatpoint.com/how-does-machine-learning-work",
    "https://www.javatpoint.com/how-to-learn-machine-learning-from-scratch",
    "https://www.javatpoint.com/is-machine-learning-hard",
    "https://www.javatpoint.com/face-recognition-in-mchine-learning",
    "https://www.javatpoint.com/product-recommendation-machine-learning",
    "https://www.javatpoint.com/designing-a-learning-system-in-machine-learning",
    "https://www.javatpoint.com/recommendation-system-machine-learning",
    "https://www.javatpoint.com/customer-segmentation-using-machine-learning",
    "https://www.javatpoint.com/detecting-phishing-websites-using-machine-learning",
    "https://www.javatpoint.com/hidden-markov-model-in-machine-learning",
    "https://www.javatpoint.com/sales-prediction-using-machine-learning",
    "https://www.javatpoint.com/crop-yield-prediction-using-machine-learning",
    "https://www.javatpoint.com/data-visualization-in-machine-learning",
    "https://www.javatpoint.com/elm-in-machine-learning",
    "https://www.javatpoint.com/probabilistic-model-in-machine-learning",
    "https://www.javatpoint.com/survival-analysis-using-machine-learning",
    "https://www.javatpoint.com/traffic-prediction-using-machine-learning",
    "https://www.javatpoint.com/t-sne-in-machine-learning",
    "https://www.javatpoint.com/bert-language-model",
    "https://www.javatpoint.com/federated-learning-in-machine-learning",
    "https://www.javatpoint.com/deep-parametric-continuous-convolutional-neural-network",
    "https://www.javatpoint.com/depth-wise-separable-convolutional-neural-networks",
    "https://www.javatpoint.com/need-for-data-structures-and-algorithms-for-deep-learning-and-machine-learning",
    "https://www.javatpoint.com/geometric-model-in-machine-learning",
    "https://www.javatpoint.com/machine-learning-in-design",
    "https://www.javatpoint.com/digit-recognition-using-machine-learning",
    "https://www.javatpoint.com/electricity-consumption-prediction-using-machine-learning",
    "https://www.javatpoint.com/data-analytics-vs-machine-learning",
    "https://www.javatpoint.com/injury-prediction-in-competitive-runners-using-machine-learning",
    "https://www.javatpoint.com/protein-folding-using-machine-learning",
    "https://www.javatpoint.com/sentiment-analysis-using-machine-learning",
    "https://www.javatpoint.com/network-intrusion-detection-system-using-machine-learning",
    "https://www.javatpoint.com/titanic-machine-learning-from-disaster",
    "https://www.javatpoint.com/adenovirus-disease-prediction-for-child-healthcare-using-machine-learning",
    "https://www.javatpoint.com/rnn-for-sequence-labelling",
    "https://www.javatpoint.com/catboost-in-machine-learning",
    "https://www.javatpoint.com/cloud-computing-future-trends",
    "https://www.javatpoint.com/histogram-of-oriented-gradients",
    "https://www.javatpoint.com/implementation-of-neural-network-from-scratch-using-numpy",
    "https://www.javatpoint.com/introduction-to-sift",
    "https://www.javatpoint.com/introduction-to-surf",
    "https://www.javatpoint.com/kubernetes-load-balancing-service",
    "https://www.javatpoint.com/kubernetes-resource-model-and-how-to-make-use-of-yaml",
    "https://www.javatpoint.com/are-robots-self-learning",
    "https://www.javatpoint.com/variational-autoencoders",
    "https://www.javatpoint.com/what-are-the-security-and-privacy-risks-of-vr-and-ar",
    "https://www.javatpoint.com/what-is-a-large-language-model",
    "https://www.javatpoint.com/privacy-preserving-machine-learning",
    "https://www.javatpoint.com/continual-learning-in-machine-learning",
    "https://www.javatpoint.com/quantum-machine-learning",
    "https://www.javatpoint.com/split-single-column-into-multiple-columns-in-pyspark-dataframe",
    "https://www.javatpoint.com/why-should-we-use-automl",
    "https://www.javatpoint.com/evaluation-metrics-for-object-detection-and-recognition",
    "https://www.javatpoint.com/mean-intersection-over-union-for-image-segmentation",
    "https://www.javatpoint.com/yolov5-object-tracker-in-videos",
    "https://www.javatpoint.com/predicting-salaries-with-machine-learning",
    "https://www.javatpoint.com/fine-tuning-large-language-omdels",
    "https://www.javatpoint.com/automl-workflow",
    "https://www.javatpoint.com/build-chatbot-webapp-with-langchain",
    "https://www.javatpoint.com/building-a-machine-learning-classification-model-with-pycaret",
    "https://www.javatpoint.com/continuous-bag-of-words-in-nlp",
    "https://www.javatpoint.com/deploying-scrapy-spider-on-scrapinghub",
    "https://www.javatpoint.com/dynamic-pricing-using-machine-learning",
    "https://www.javatpoint.com/how-to-improve-neural-networks-by-using-complex-numbers",
    "https://www.javatpoint.com/introduction-to-bayesian-deep-learning",
    "https://www.javatpoint.com/lidar-light-detection-and-ranging-for-3d-reconstruction",
    "https://www.javatpoint.com/meta-learning-in-machine-learning",
    "https://www.javatpoint.com/geomagnetic-field-using-machine-learning",
    "https://www.javatpoint.com/image-generation-using-machine-learning",
    "https://www.javatpoint.com/confidence-intervals",
    "https://www.javatpoint.com/facebook-prophet",
    "https://www.javatpoint.com/understanding-optimization-algorithms-in-machine-learning",
    "https://www.javatpoint.com/what-are-probabilistic-models-in-machine-learning",
    "https://www.javatpoint.com/how-to-choose-the-best-linear-regression-model",
    "https://www.javatpoint.com/how-to-remove-non-stationarity-from-time-series",
    "https://www.javatpoint.com/autoencoders",
    "https://www.javatpoint.com/cat-classification-using-machine-learning",
    "https://www.javatpoint.com/aic-and-bic",
    "https://www.javatpoint.com/inception-model",
    "https://www.javatpoint.com/architecture-of-machine-learning",
    "https://www.javatpoint.com/business-intelligence-vs-machine-learning",
    "https://www.javatpoint.com/guide-to-cluster-analysis-applications-best-practices",
    "https://www.javatpoint.com/linear-regression-using-gradient-descent",
    "https://www.javatpoint.com/text-clustering-with-k-means",
    "https://www.javatpoint.com/the-significance-and-applications-of-covariance-matrix",
    "https://www.javatpoint.com/stationarity-tests-in-time-series",
    "https://www.javatpoint.com/graph-machine-learning",
    "https://www.javatpoint.com/introduction-to-xgboost-algorithm-in-machine-learning",
    "https://www.javatpoint.com/bahdanau-attention",
    "https://www.javatpoint.com/w-gan",
    "https://www.javatpoint.com/greedy-layer-wise-pre-training",
    "https://www.javatpoint.com/onevsrestclassifier",
    "https://www.javatpoint.com/best-program-for-machine-learning",
    "https://www.javatpoint.com/deep-boltzmann-machines-dbms-in-machine-learning",
    "https://www.javatpoint.com/find-patterns-in-data-using-machine-learning"]

In [29]:
url_list_4 = [
    "https://www.javatpoint.com/generalized-linear-models",
    "https://www.javatpoint.com/how-to-implement-gradient-descent-optimization-from-scratch",
    "https://www.javatpoint.com/interpreting-correlation-coefficients",
    "https://www.javatpoint.com/eigenfaces",
    "https://www.javatpoint.com/image-captioning-using-machine-learning",
    "https://www.javatpoint.com/fit-vs-predict-vs-fit-predict-in-python-scikit-learn",
    "https://www.javatpoint.com/cnn-filters",
    "https://www.javatpoint.com/shannon-entropy",
    "https://www.javatpoint.com/time-series-exponential-smoothing",
    "https://www.javatpoint.com/vector-norms-in-machine-learning",
    "https://www.javatpoint.com/swarm-intelligence",
    "https://www.javatpoint.com/l1-and-l2-regularization-methods-in-machine-learning",
    "https://www.javatpoint.com/ml-approaches-for-time-series",
    "https://www.javatpoint.com/mse-and-bias-variance-decomposition",
    "https://www.javatpoint.com/relu",
    "https://www.javatpoint.com/simple-exponential-smoothing",
    "https://www.javatpoint.com/tf-idf",
    "https://www.javatpoint.com/how-to-optimise-machine-learning-model",
    "https://www.javatpoint.com/multiclass-logistic-regression-from-scratch",
    "https://www.javatpoint.com/lightbm-multilabel-classification",
    "https://www.javatpoint.com/monte-carlo-methods",
    "https://www.javatpoint.com/what-is-inverse-reinforcement-learning",
    "https://www.javatpoint.com/content-based-recommender-system",
    "https://www.javatpoint.com/context-awareness-recommender-system",
    "https://www.javatpoint.com/predicting-flights-using-machine-learning",
    "https://www.javatpoint.com/ntlk-corpus",
    "https://www.javatpoint.com/traditional-feature-engineering-models",
    "https://www.javatpoint.com/concept-drift-and-model-decay-in-machine-learning",
    "https://www.javatpoint.com/hierarchical-reinforcement-learning",
    "https://www.javatpoint.com/what-is-feature-scaling-and-why-is-it-important-in-machine-learning",
    "https://www.javatpoint.com/difference-between-statistical-model-and-machine-learning",
    "https://www.javatpoint.com/introduction-to-ranking-algorithms-in-machine-learning",
    "https://www.javatpoint.com/multicollinearity-causes-effects-and-detection",
    "https://www.javatpoint.com/bag-of-n-grams-model"
]

In [17]:
url_list_2 = ["https://www.javatpoint.com/artificial-intelligence-ai", "https://www.javatpoint.com/application-of-ai",
        "https://www.javatpoint.com/history-of-artificial-intelligence", "https://www.javatpoint.com/types-of-artificial-intelligence",
        "https://www.javatpoint.com/types-of-ai-agents", "https://www.javatpoint.com/agents-in-ai", "https://www.javatpoint.com/agent-environment-in-ai", 
        "https://www.javatpoint.com/turing-test-in-ai","https://www.javatpoint.com/search-algorithms-in-ai", "https://www.javatpoint.com/ai-uninformed-search-algorithms", 
        "https://www.javatpoint.com/ai-informed-search-algorithms", "https://www.javatpoint.com/hill-climbing-algorithm-in-ai", "https://www.javatpoint.com/means-ends-analysis-in-ai",
        "https://www.javatpoint.com/ai-adversarial-search", "https://www.javatpoint.com/ai-alpha-beta-pruning", "https://www.javatpoint.com/knowledge-based-agent-in-ai",
        "https://www.javatpoint.com/knowledge-representation-in-ai", "https://www.javatpoint.com/ai-techniques-of-knowledge-representation", 
        "https://www.javatpoint.com/propositional-logic-in-artificial-intelligence", "https://www.javatpoint.com/rules-of-inference-in-artificial-intelligence",
        "https://www.javatpoint.com/the-wumpus-world-in-artificial-intelligence", "https://www.javatpoint.com/ai-knowledge-base-for-wumpus-world", 
        "https://www.javatpoint.com/first-order-logic-in-artificial-intelligence", "https://www.javatpoint.com/ai-knowledge-engineering-in-first-order-logic",
        "https://www.javatpoint.com/ai-inference-in-first-order-logic", "https://www.javatpoint.com/ai-unification-in-first-order-logic","https://www.javatpoint.com/forward-chaining-and-backward-chaining-in-ai",
        "https://www.javatpoint.com/reasoning-in-artificial-intelligence","https://www.javatpoint.com/difference-between-inductive-and-deductive-reasoning","https://www.javatpoint.com/probabilistic-reasoning-in-artifical-intelligence",
        "https://www.javatpoint.com/bayes-theorem-in-artifical-intelligence","https://www.javatpoint.com/bayesian-belief-network-in-artificial-intelligence","https://www.javatpoint.com/subsets-of-ai","https://www.javatpoint.com/expert-systems-in-artificial-intelligence",
         ]

In [18]:
url_list_3 = ["https://www.javatpoint.com/data-mesh-rethinking-enterprise-data-architecture","https://www.javatpoint.com/powerful-data-collection-tools-in-healthcare","https://www.javatpoint.com/workflow-of-data-analytics","https://www.javatpoint.com/life-cycle-phases-of-data-analytics","https://www.javatpoint.com/model-planning-for-data-analytics","https://www.javatpoint.com/real-time-analytics-in-big-data","https://www.javatpoint.com/what-is-a-generative-adversarial-network","https://www.javatpoint.com/what-is-data-analysis","https://www.javatpoint.com/stylegan-style-generative-adversarial-networks","https://www.javatpoint.com/what-is-univariate-bivariate-and-multivariate-analysis-in-data-visualisation","https://www.javatpoint.com/what-is-amazon-glacier","https://www.javatpoint.com/what-is-dall-e","https://www.javatpoint.com/scope-of-data-science-in-india","https://www.javatpoint.com/skills-to-become-a-data-scientist","https://www.javatpoint.com/data-science-in-digital-marketing",
"https://www.javatpoint.com/how-to-make-a-career-in-data-science","https://www.javatpoint.com/top-data-science-programming-languages","https://www.javatpoint.com/data-science-for-weather-prediction","https://www.javatpoint.com/data-science-skills-to-boost-your-career","https://www.javatpoint.com/hadoop-for-data-science","https://www.javatpoint.com/machine-learning-for-data-science","https://www.javatpoint.com/career-opportunities-in-data-science","https://www.javatpoint.com/data-science-use-cases","https://www.javatpoint.com/role-of-sql-in-data-science","https://www.javatpoint.com/r-for-data-science","https://www.javatpoint.com/how-to-get-your-first-job-in-data-science","https://www.javatpoint.com/top-data-science-jobs-trends","https://www.javatpoint.com/r-vs-python-vs-sas-for-data-science","https://www.javatpoint.com/data-science-process","https://www.javatpoint.com/nlp-for-data-science","https://www.javatpoint.com/sas-for-data-science","https://www.javatpoint.com/top-data-science-algorithms","https://www.javatpoint.com/big-data-as-a-service",
"https://www.javatpoint.com/data-science-in-agriculture","https://www.javatpoint.com/data-security","https://www.javatpoint.com/what-is-data-analytics","https://www.javatpoint.com/a-day-in-the-life-of-a-data-scientist","https://www.javatpoint.com/data-science-and-predictive-analytics","https://www.javatpoint.com/data-science-vs-business-analytics","https://www.javatpoint.com/basic-statistics-concepts-for-data-science","https://www.javatpoint.com/what-is-a-data-hub","https://www.javatpoint.com/exploratory-data-analysis","https://www.javatpoint.com/data-science-techniques","https://www.javatpoint.com/data-types-in-statistics","https://www.javatpoint.com/design-principles-in-system-design","https://www.javatpoint.com/web-development-vs-data-science","https://www.javatpoint.com/calculus-in-data-science-and-its-uses","https://www.javatpoint.com/what-do-data-science-managers-do","https://www.javatpoint.com/data-analysis-tools-for-beginners-and-experts","https://www.javatpoint.com/how-to-convert-json-into-a-pandas-dataframe","https://www.javatpoint.com/understanding-the-derivative-of-the-sigmoid-functions",
"https://www.javatpoint.com/what-are-categorical-data-encoding-methods","https://www.javatpoint.com/bernoulli-trials-and-binomial-distribution","https://www.javatpoint.com/derivation-of-cross-entropy-function","https://www.javatpoint.com/empirical-cumulative-distribution-function-cdf-plots","https://www.javatpoint.com/linear-programming-definition-methods-and-problems","https://www.javatpoint.com/data-ingestion","https://www.javatpoint.com/mean-average-precision-map","https://www.javatpoint.com/python-libraries-for-extracting-text-from-images","https://www.javatpoint.com/the-harsh-reality-of-being-a-data-scientist","https://www.javatpoint.com/data-analyst-vs-data-scientist","https://www.javatpoint.com/sculpting-data","https://www.javatpoint.com/what-is-a-data-evangelist","https://www.javatpoint.com/api-for-data","https://www.javatpoint.com/pandas-vs-sql-for-data-analysis","https://www.javatpoint.com/8-types-of-bias-in-data-analysis-and-how-to-avoid-them","https://www.javatpoint.com/ai-transformer","https://www.javatpoint.com/cnn-layers","https://www.javatpoint.com/aws-solution-architect-vs-developer",
"https://www.javatpoint.com/bias-in-data-collection","https://www.javatpoint.com/client-server-pattern","https://www.javatpoint.com/airflow-machine-learning","https://www.javatpoint.com/artificial-intelligence-competition","https://www.javatpoint.com/audio-machine-learning","https://www.javatpoint.com/automatic-question-answer-data-science","https://www.javatpoint.com/bagging-decision-tree","https://www.javatpoint.com/what-is-vectorization","https://www.javatpoint.com/top-3-ways-to-get-started-with-dataops-pipelines","https://www.javatpoint.com/6-machine-learning-algorithms-anyone-learning-data-science-should-know","https://www.javatpoint.com/how-to-deal-with-missing-data","https://www.javatpoint.com/cnn-calculations","https://www.javatpoint.com/color-palette-seaborn","https://www.javatpoint.com/ai-box-experiment","https://www.javatpoint.com/causal-tree","https://www.javatpoint.com/python-filesystem","https://www.javatpoint.com/time-series-transformer","https://www.javatpoint.com/5-changepoint-detection-algorithms-every-data-scientist-should-know","https://www.javatpoint.com/latent-methods-for-dimension-reduction-and-topic-modeling",
"https://www.javatpoint.com/similarity-and-dissimilarity-measures-in-data-science","https://www.javatpoint.com/20-pandas-tips-and-tricks-for-beginners","https://www.javatpoint.com/types-of-predictive-models-in-data-science","https://www.javatpoint.com/what-is-aws-glue","https://www.javatpoint.com/key-components-of-a-well-written-data-model","https://www.javatpoint.com/20-questions-to-ask-prior-to-starting-data-analysis","https://www.javatpoint.com/how-to-add-a-new-column-to-a-pyspark-dataframe","https://www.javatpoint.com/useful-pip-commands-for-data-science","https://www.javatpoint.com/best-jupyterlab-extensions","https://www.javatpoint.com/big-data-for-small-companies","https://www.javatpoint.com/cloud-computing-for-dummies","https://www.javatpoint.com/correlation-does-not-imply-causation","https://www.javatpoint.com/data-processing-architectures-for-big-data-lambda-and-kappa","https://www.javatpoint.com/what-is-supply-chain-analytics","https://www.javatpoint.com/3-easy-ways-to-deploy-your-streamlit-web-app-online","https://www.javatpoint.com/3-ways-to-load-csv-files-into-colab","https://www.javatpoint.com/4-pre-trained-cnn-models-to-use-for-computer-vision-with-transfer-learning",
"https://www.javatpoint.com/5-data-skills-to-move-from-junior-to-senior-data-scientist","https://www.javatpoint.com/7-reasons-why-you-should-use-jupyterlab-for-data-science","https://www.javatpoint.com/advanced-prompt-engineering","https://www.javatpoint.com/data-management-skills","https://www.javatpoint.com/gradient-boosting-vs-random-forest","https://www.javatpoint.com/graphs-networks-and-algorithms","https://www.javatpoint.com/how-to-effectively-showcase-personal-projects-on-your-data-science-resume","https://www.javatpoint.com/interpreting-acf-and-pacf-plots-for-time-series-analysis","https://www.javatpoint.com/non-technical-skills-in-data-analytics","https://www.javatpoint.com/project-management-skills-and-frameworks-for-data-scientists","https://www.javatpoint.com/understanding-nonlinear-regression-with-examples","https://www.javatpoint.com/what-are-the-most-common-mistakes-to-avoid-when-working-with-time-series-data-sources","https://www.javatpoint.com/what-is-a-senior-data-scientist","https://www.javatpoint.com/data-pipeline-scheduling-strategies-for-data-science","https://www.javatpoint.com/descriptive-analytics","https://www.javatpoint.com/7-steps-to-ensure-and-sustain-data-quality",
"https://www.javatpoint.com/6-nlp-techniques-every-data-scientist-should-know","https://www.javatpoint.com/6-predictive-models-every-beginner-data-scientist-should-master","https://www.javatpoint.com/best-free-websites-to-learn-programming","https://www.javatpoint.com/top-10-sectors-making-use-of-big-data-analytics","https://www.javatpoint.com/3-ways-to-aggregate-data-in-pyspark","https://www.javatpoint.com/5-ways-to-apply-data-science-to-real-estate","https://www.javatpoint.com/23-great-pandas-codes-for-data-scientists","https://www.javatpoint.com/graph-neural-networks-with-pytorch-and-pytorch-geometric","https://www.javatpoint.com/data-visualization-grammar-of-graphics","https://www.javatpoint.com/matplotlib-subfigures","https://www.javatpoint.com/multivariate-time-series-analysis","https://www.javatpoint.com/sql-problems-for-data-science","https://www.javatpoint.com/five-ways-to-detect-outliers-anomalies-that-every-data-scientist-should-know-python-code","https://www.javatpoint.com/a-beginners-guide-to-the-data-science-pipeline","https://www.javatpoint.com/a-beginners-guide-to-database-reliability-engineering","https://www.javatpoint.com/optimizing-data-warehousing","https://www.javatpoint.com/time-series-forecasting-methods",
"https://www.javatpoint.com/data-scientific-method","https://www.javatpoint.com/top-techniques-to-handle-missing-values-every-data-scientist-should-know","https://www.javatpoint.com/data-engineer-vs-data-scientist","https://www.javatpoint.com/data-similarity-metrics","https://www.javatpoint.com/some-essential-numerical-summaries-in-statistics-for-data-science"]

In [None]:
import csv
def append_in_csv(text):
    # Open the CSV file in read mode to check if the data already exists
    try:
        with open('javaTpoint.csv', mode='r', newline='', encoding='utf-8') as file:
            reader = csv.reader(file)
            existing_rows = [row[0] for row in reader]  # Assuming each row contains one column (text)
            
            # Check if the text is already in the CSV
            if text not in existing_rows:
                with open('javaTpoint.csv', mode='a', newline='', encoding='utf-8') as file_append:
                    writer = csv.writer(file_append)
                    writer.writerow([text])  # Append the new text
            else:
                pass
    except FileNotFoundError:
        # If the CSV file doesn't exist yet, create it and write the first row
        with open('javaTpoint.csv', mode='a', newline='', encoding='utf-8') as file_append:
            writer = csv.writer(file_append)
            writer.writerow([text])  # Write the text as a new row
        print(f"Created file and added first row: {text}")

def get_csv_with_full_data(url_list):        
    for url in url_list:
        print(url)
        chunks = get_chunks(url)
        for i in range(len(chunks)):
            append_in_csv(chunks[i])
        
short_tutorial_urls =[ "https://www.javatpoint.com/reinforcement-learning", "https://www.javatpoint.com/nlp"]
# Code for just appending three short urls.
for url in short_tutorial_urls:
    print(url)
    chunks = get_chunks(url)
    for i in range(len(chunks)):
        if i==0:
            continue
        else:
            append_in_csv(chunks[i])

get_csv_with_full_data(url_list_4)
print("MLDONE")
get_csv_with_full_data(url_list_2)
get_csv_with_full_data(url_list_3)

https://www.javatpoint.com/reinforcement-learning
https://www.javatpoint.com/nlp
https://www.javatpoint.com/generalized-linear-models
https://www.javatpoint.com/how-to-implement-gradient-descent-optimization-from-scratch
https://www.javatpoint.com/interpreting-correlation-coefficients
https://www.javatpoint.com/eigenfaces
https://www.javatpoint.com/image-captioning-using-machine-learning
https://www.javatpoint.com/fit-vs-predict-vs-fit-predict-in-python-scikit-learn
https://www.javatpoint.com/cnn-filters
https://www.javatpoint.com/shannon-entropy
https://www.javatpoint.com/time-series-exponential-smoothing
https://www.javatpoint.com/vector-norms-in-machine-learning
https://www.javatpoint.com/swarm-intelligence
https://www.javatpoint.com/l1-and-l2-regularization-methods-in-machine-learning
https://www.javatpoint.com/ml-approaches-for-time-series
https://www.javatpoint.com/mse-and-bias-variance-decomposition
https://www.javatpoint.com/relu
https://www.javatpoint.com/simple-exponential-sm