# Red some website contents and respective classification data, train multiples AI and predict the website category.

## This will be a simple solution using CountVectorizer and non complex text processing. The training data is not big enough to good generalization, but doing right decisions you can have a good predictor, at least something better than random prediction :)

## Import basic libraries:

In [1]:
import pandas as pd
import numpy as np
import collections as cll
import nltk
from nltk import tokenize

## Read the dataset

In [2]:
dataset = pd.read_csv('website_classification.csv',usecols=['cleaned_website_text','Category'])
y = dataset.iloc[:,-1].values

In [3]:
dataset.head()

Unnamed: 0,cleaned_website_text,Category
0,official site good hotel accommodation big sav...,Travel
1,expedia hotel book sites like use vacation wor...,Travel
2,tripadvisor hotel book sites like previously d...,Travel
3,cheap flights search compare flights momondo f...,Travel
4,bot create free account create free account si...,Travel


## Print the first site content

In [4]:
print(dataset.iloc[0,0])

official site good hotel accommodation big saving hotel destination worldwide browse hotel review find guarantee good price hotel budget lodging accommodation hotel hotels special offer package special weekend break city break deal budget cheap discount saving select language find deal hotel home try search connect traveller india travel talk community recommend destination flamborough boreland colvend catfield harberton warleggan inspiration trip spot winter wildlife beautiful snowy island bye bye work want spontechnaity tech drive travel vital value maximise travel homes guest love browse property type hotels apartments resorts villa cabins cottage glamping serviced apartment holiday home guest house hostels motels ryokans riads holiday park homestays campsites country house farm stay boats luxury tent self catering accommodation tiny house chapel saint leonards wuqing wuchang saint eval great rowsley instow verified review real guest work start booking follow trip finally review ski

In [5]:
print(y)

['Travel' 'Travel' 'Travel' ... 'Adult' 'Adult' 'Adult']


## Check the balance of the categories

In [6]:
cll.Counter(y)

Counter({'Travel': 107,
         'Social Networking and Messaging': 83,
         'News': 96,
         'Streaming Services': 105,
         'Sports': 104,
         'Photography': 93,
         'Law and Government': 84,
         'Health and Fitness': 96,
         'Games': 98,
         'E-Commerce': 102,
         'Forums': 16,
         'Food': 92,
         'Education': 114,
         'Computers and Technology': 93,
         'Business/Corporate': 109,
         'Adult': 16})

## Remove Forums and Adult, they are not balanced, so they can decrease the accuracy

In [7]:
dataset = dataset[dataset.Category != 'Forums' ]
dataset = dataset[dataset.Category != 'Adult' ] 
y = dataset.iloc[:,-1].values

## Create the bag of words structure

In [8]:
from sklearn.feature_extraction.text import CountVectorizer

In [9]:
cv = CountVectorizer(max_features = 1322)
X = cv.fit_transform(dataset['cleaned_website_text']).toarray()

In [10]:
print(X)

[[0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


## Import the AI algorithms to do classification

In [11]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

## Create the model_params to explore and find the best AI algorithm using GridSearchCV

In [12]:
model_params = {
    'svm' : { 
        'model' : svm.SVC(gamma='auto'),
        'params' : {
            'C' : [ 1, 5, 10, 20],
            'kernel' : [ 'rbf', 'linear' ]
        }
    },
    'random_forest' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'criterion' : [ 'gini', 'entropy' ],
            'n_estimators' : [ 1, 5, 10 , 15]
        }
    },
    'logistic_regression' : {
        'model' : LogisticRegression(multi_class='auto',max_iter=5000,dual=False),
        'params' : {
            'penalty' : [ 'l2' ],
            'C': [ 1, 2, 5, 10],
            'solver' : [ 'lbfgs' ]
        }
    },
    'naive_bayes_gaussiannb' : { 
        'model' : GaussianNB(), 
        'params' : {}
    },
    'naive_bayes_multinomialNB' : {
        'model' : MultinomialNB(),
        'params' : {
            'alpha' : [ 1, 2, 5, 10 ], 
            'fit_prior' : [ 0, 1 ]
        }
    },
    'decision_tree_classifier' : {
        'model' : DecisionTreeClassifier(),
        'params' : {
            'criterion' : [ 'gini', 'entropy' ],
            'splitter' : [ 'best' , 'random' ]
        }
    }
}

## Print the models

In [13]:
for item in model_params:
    print(item)

svm
random_forest
logistic_regression
naive_bayes_gaussiannb
naive_bayes_multinomialNB
decision_tree_classifier


## Print the models and their parameters

In [14]:
for model_name, params in model_params.items():
    print(model_name)
    print(params)

svm
{'model': SVC(gamma='auto'), 'params': {'C': [1, 5, 10, 20], 'kernel': ['rbf', 'linear']}}
random_forest
{'model': RandomForestClassifier(), 'params': {'criterion': ['gini', 'entropy'], 'n_estimators': [1, 5, 10, 15]}}
logistic_regression
{'model': LogisticRegression(max_iter=5000), 'params': {'penalty': ['l2'], 'C': [1, 2, 5, 10], 'solver': ['lbfgs']}}
naive_bayes_gaussiannb
{'model': GaussianNB(), 'params': {}}
naive_bayes_multinomialNB
{'model': MultinomialNB(), 'params': {'alpha': [1, 2, 5, 10], 'fit_prior': [0, 1]}}
decision_tree_classifier
{'model': DecisionTreeClassifier(), 'params': {'criterion': ['gini', 'entropy'], 'splitter': ['best', 'random']}}


## Implement the GridSearchCV to find the best classifier

In [15]:
from sklearn.model_selection import GridSearchCV

In [16]:
# implement the GridSearchCV(estimator, param_grid, cv=5, return_train_score=False)
scores = []
for model_name, model_items in model_params.items():
    grid_clf = GridSearchCV(model_items['model'], model_items['params'], cv=5, return_train_score=False)

    grid_clf.fit(X, y)
    
    # append the score
    scores.append({
        'model' : model_name,
        'best score' : grid_clf.best_score_,
        'best param' : grid_clf.best_params_
    })

## Create pandas dataframe and add the colums to make it simple to read, then sort from the best score to the lowest

In [17]:
df = pd.DataFrame(scores, columns = [ 'model', 'best score', 'best param' ])

In [18]:
df.sort_values(by=['best score'], ascending=False).head()

Unnamed: 0,model,best score,best param
4,naive_bayes_multinomialNB,0.881552,"{'alpha': 2, 'fit_prior': 0}"
2,logistic_regression,0.818306,"{'C': 1, 'penalty': 'l2', 'solver': 'lbfgs'}"
1,random_forest,0.739083,"{'criterion': 'gini', 'n_estimators': 15}"
0,svm,0.696195,"{'C': 1, 'kernel': 'linear'}"
5,decision_tree_classifier,0.569781,"{'criterion': 'gini', 'splitter': 'best'}"


## MultinomialNB got the best score (88.1552%). Lets try to classify a random site text with that algorithm.

## Create the classifer object:

In [19]:
classifier = MultinomialNB(alpha=2, fit_prior=0)
classifier.fit(X, y)

## Get some random news text under CNN

In [20]:
text_cnn_news = 'CNN news The Amazon is a place that both Brazil’s former military regime and President Jair Bolsonaro have eyed with dreams of development, looking to take advantage of bountiful resources. It’s a place where Bolsonaro’s deconstruction of state institutions is wreaking havoc. Where illegal and armed actors are pushing into formerly protected areas and plundering the land to make a quick profit. Where Indigenous people are constantly under threat, whether staring down the barrel of a gun, fires, or COVID, and their territories and their way of life are under attack — the invaders slowly eating away at the forests that have given them life for generations. And where Bolsonaro has been empowering those behind the devastation. There is no other place in Brazil that better symbolizes the destruction that Bolsonaro has wrought on the country. The Amazon is under attack. The fires are just the most visible sign. This is Brazil on Fire, a podcast about Brazil’s descent toward fascism under President Jair Bolsonaro. Over these six episodes we look at Bolsonaro’s far-right government that has set the country ablaze, and how the United States helped him do it. We’ll visit the birthplace of Brazilian Nazism, evangelical churches, and Indigenous villages in the Amazon.  Hosted by Latin America-based journalist Michael Fox. This podcast is produced in partnership between The Real News Network and NACLA. Edited by Heather Gies. Sound design by Gustavo Türck.Theme music by Monte Perdido. TRANSCRIPT Michael Fox:  São Paulo, Brazil. August 19, 2019. It’s mid-afternoon, but the sky is as dark as night. Street lamps turn on. Car headlights. Residents across the city stare up at the sky. Several videos like this go viral across the country. It’s an apocalyptic scene. See, thousands of fires are blazing across the Amazon, more than a thousand miles away. And the smoke has engulfed Brazil’s financial capital.  This scene draws the nation’s attention to the Amazon. Within days, protests ripple across the country, demanding action to stop the fires. You probably remember this – It was the front page story for a few days.  The fires have hit epic proportions. And President Jair Bolsonaro is to blame. That’s what they’re chanting here in my town of Florianopolis in one of dozens of demonstrations happening across Brazil and the world: “Get out Bolsonaro. We need the Amazon.” Protesters young and old are amassed on the steps of the city’s main cathedral near narrow pedestrian streets lined with stores. They carry signs: “We can’t eat money.” “Your beef is burning the Amazon.” “Save our forest.” Behind them is a large, inflatable blue globe. They take turns speaking to the crowd.  “I feel like we are not caring for what is really important: our planet,” says Luiza Goldinho Vieira. She’s a young chemistry student, with thick brown glasses and long dark hair. “I feel completely destroyed,” she says. “I feel like a piece of me is burning.” They slam President Bolsonaro for causing the fires and failing to stop them. Bolsonaro has long promised to open up the Amazon for development and end the process of recognizing Indigenous lands. And his rhetoric, plus government cuts to Indigenous and environmental agencies, has spurred farmers, loggers, and land grabbers into action. “They said there’d be no more fines for those who set fire or cut down trees, and that’s what happened here,” says Jesuina Alves Braga. She’s an organic farmer from a Landless Workers’ Movement settlement in the Amazonian state of Acre. In a video shared online, she sits before a patch of scorched earth, black tree trunks on their sides like fallen soldiers. “People were empowered,” she says, “and that’s what they did. They set their fires without worrying if it would impact their neighbor or not.” Farmers often set fires during the dry season to clear pastures. But under Bolsonaro’s administration, they have also used the burns to extend their reach, ripping into conservation areas and Indigenous territories. The August 2019 fires were planned by a group of roughly 70 powerful farmers, land grabbers, and businessmen in the Amazonian state of Para. They set the date for August 10. The idea was to show their support for President Bolsonaro and his policy of weakening environmental inspections and fines. They coordinated over the messaging application WhatsApp, and even took out an ad in a local newspaper urging others to join. They called it “The Day of Fire.” Both Brazil’s environment agency and the justice ministry were alerted of the plans days in advance. They did nothing. On The Day of Fire – August 10, 2019 – Nearly 1,500 separate blazes were registered across the state. Large landowners across the Amazon followed their lead. In the following days and weeks, the fires would catch across the region, a third of them on protected land. More than 4,700 fires burned on Indigenous territories throughout August. “We will resist,” says a young Indigenous leader from Xingu territory in a video shared widely over social media. She wears a crown of tall yellow feathers. Members of her tribe stand behind her. Xingu land is among the top 12 Indigenous territories most impacted by the fires. But they are taking their toll everywhere across the Amazon. Around this time, I got in touch with Andre Karipuna. He’s the young chief of the Karipuna people, a tribe of about only 60 residents in territory in the Brazilian state of Rondonia, one of the places where fires have been blazing out of control. “There’s been smoke everywhere here,” he told me over the phone. He says illegal land grabbers set fire to at least one section of their territory, which had already been illegally cleared. “Since there are so many burns, it’s really hurting our health,” he says. “We’ve been sneezing and coughing. We’ve had trouble breathing because of the smoke.” But it’s not just the fires. Andre has received death threats. And he says he can hear the tractors at night, ripping up their forest just a few miles away from their village. And the Brazilian state, which used to protect these people and their territories, has left them exposed and unguarded. Andre is concerned for himself. He’s concerned for his people. This is what Bolsonaro has unleashed on the Amazon: a literal and metaphorical firestorm – Threats, violence, flames and smoke. And 2019 is only the beginning. Bolsonaro has gutted state regulatory agencies that previously protected the land and the Indigenous peoples living there. He’s pushed to free up mining and natural resource extraction on Indigenous territories, and he’s empowered illegal actors who have invaded Indigenous territories like never before. Thousands of illegal miners, loggers, narcotraffickers, and land grabbers fighting to profit off the destruction of the “lungs of the planet”.  There is a clash of civilizations here – A clash of worlds. And a Wild West attitude that has led to ever increasing threats and violence against Indigenous peoples and their allies. There is no other place in Brazil that better symbolizes the destruction that Bolsonaro has wrought on the country. The Amazon is ground zero, and it is under attack.  This is Brazil on Fire, a podcast about Brazil’s descent toward fascism under President Jair Bolsonaro. This podcast is produced in collaboration with The Real News and NACLA. I’m your host, Michael Fox. I’m a long-time radio reporter and multimedia journalist. I’ve lived in Brazil for years, and I’ve covered Bolsonaro and his government closely. Over these 6 episodes, I’m taking you on a journey to understand the story of Bolsonaro’s rise, and his far-right government that’s set the country ablaze.  Last episode, we looked at Bolsonaro’s relationship with the military and the legacy of the country’s 21-year dictatorship. Today, we take a deep dive into the Amazon, where Bolsonaro’s policies and actions are wreaking havoc on the environment and Indigenous communities.  This is Episode 6: “Amazon up in Smoke: Policies of Destruction”  It’s February 2020. I’m on a flight up to Carajas Airport, in the East of the Amazonian state of Pará. This is the same state where the fires kicked off a few months ago. But this state is massive. Like if Pará alone were a country, it’d be the 23rd largest in the world. It’s almost two and a half times the size of Spain. More than five times the size of the UK. And the difficulty of travel in some areas, because of the lack of paved roads, makes it seem even bigger.  Pará is just one of eight states that make up the Amazon in Northern Brazil. And, you know that image you have of the Amazon: Pristine jungle, endless forest, biodiversity. Well, it’s here, but it’s not what it used to be. And it’s harder to get to. See, huge areas of land here are either now under development or were already developed a long time ago. Take the Carajas Mountains, where I’m flying over now. Out the window, through the clouds, I catch glimpses of lush green jungle hillsides that seem formidable and endless. And then…. The landscape changes – It turns brown and gutted. Deep cuts scar the countryside as if slashed with a knife. It’s like a huge endless construction zone has been carved out of the hillsides, one after the other. It’s Carajas, the largest open-pit iron ore mine in the world. It’s run by the Brazilian multinational company Vale, one of the biggest mining corporations on the planet.  See, 60 years ago, this area was pristine, impenetrable jungle. Then, in 1967, representatives of US Steel – You know, the billion-dollar US multinational corporation – Well, they discovered rich mineral deposits here. Within a few years, US Steel and Vale formed a joint venture and began extracting millions of tons of iron ore per year.  The mine transformed not just the local environment, but the region. In 1976, the neighboring town of Parauapebas was founded in an area that was previously jungle. Today, with a population of around 200,000, it looks like any other Brazilian city. That’s how development works here. Resource extraction requires the building of roads, infrastructure, homes. Newcomers arrive in search of work, pushing Indigenous people off their land. The forest is either chopped up and sold as hardwood, or just cleared and burned to make room for the endless advance of so-called progress.  At least, that’s how it’s worked historically, especially under Brazil’s military dictatorship. See, it’s not by accident that the Carajas mine was founded then. We talked a lot about the dictatorship in the last episode, but there is something we didn’t get to that’s really important for understanding what’s happening in the Amazon today.  The military was all in for the development of the Amazon, particularly after they discovered the iron ore deposits at Carajas. They built roads and highways. They presented the Amazon as an enemy to be conquered, and encouraged thousands of Brazilians to uproot their families and move to the rainforest. They prom sed subsidies and financial incentives for cattle ranchers. One piece of propaganda in a 1972 newspaper read: “Drive your herd to the biggest pasture in the world.” The Amazon. They built rail lines, hydroelectric dams, and the TransAmazon highway. The Indigenous people who were there… They either needed to get on board or get out of the way. And that is what happened.  According to Brazil’s National Truth Commission, state forces under the dictatorship killed more than 8,000 Indigenous people in their push to clear the jungle and open up the region. Dozens of  tribes were impacted. Some were forcibly relocated as many as five times. Others were utterly decimated. Villages massacred, pushed to the brink of extermination. All in the name of the regime’s National Integration Plan. And the dictatorship’s Indigenous policies of integrate or get out have been reinstituted by president Bolsonaro, albeit under the guise of a more friendly approach.  “The Indians have changed. They are evolving,” Bolsonaro said during a Facebook livestream in 2020. “More and more they are human beings just like us. So we have to help them integrate into society and make them truly the owners of their Indigenous land.” Bolsonaro has been increasingly vocal about his take on the plight of Brazil’s Native peoples. On numerous occasions over the last two years, he’s met with Indigenous communities that are receptive to his call to open up the region to development. “The Indians want to work,” he told cameras last August, while wearing a colorful feather headdress and a bow and arrow slung over his shoulder. “They want the Internet. They want to pay taxes, and we should give them freedom to work, to produce, to integrate themselves into Brazilian society,” he said. That position is 180 degrees from the state policies that guided Brazil’s Indigenous affairs for the last three decades, and which defended Indigenous peoples’ right to remain on their land. A right protected in the 1988 Constitution. Each year, state institutions slowly demarcated and officially recognized more Indigenous territories. But on the campaign trail in 2018, Bolsonaro promised “Not one centimeter more for Indigenous lands…” It was a powerful sign. And one that set the tone for illegal actors hoping to roll back Indigenous rights and land protections in the Amazon.  “The Bolsonaro government gave people a sense of carte blanche,” says anthropologist Barbara Arisi. “Carte blanche to deforest. Carte blanche to attack. The people felt this moment of ‘now is the time to take what we want.’ This idea that a developed Brazil is a Brazil where the forest and the Indians are gone. It’s not true. In fact, it’s the exact opposite of the ecological discourse of everyone else… But that’s what they’ve pushed for,” she says. Land invasions of Indigenous territories spiked 150% just in the first months of Bolsonaro’s presidency. Meanwhile, the president began to push for legislation that would open their territories to resource extraction. [Jair Bolsonaro speaking] That’s him in early 2020, presenting a new bill to congress aimed at opening access to new mining concessions in Indigenous areas. Rodrigo Toniol is an anthropologist at the Federal University of Rio de Janeiro. Rodrigo Toniol:  For Bolsonaro, the Amazon is a great treasure chest, and it needs to be exploited, but there are Indians in the way. So Bolsonaro’s working to find a means to destroy the protections that the Indigenous peoples have acquired. Michael Fox:  Indigenous peoples have been one of the greatest opponents to Bolsonaro. They have marched, protested, rallied, and held ceremonies against his policies. They’ve descended on Brasilia by the thousands several times during Bolsonaro’s government, setting up week-long encampments to push back on the president, or to pressure for their rights during important legislative debates. The sounds of their dancing and chants have echoed across the streets outside Congress and the Supreme Court, which have repeatedly debated key issues affecting Indigenous peoples, their territories, and their futures.  Essential to it all, is the defense of their territories. The demarcation of their land. This is important. Today Brazil has almost 500 officially recognized Indigenous territories. Another 700 are at different stages of the demarcation process. And that distinction makes a big difference. That’s what I learned from Manuel Verona. He’s an 82-year-old former chief of the Juruna people, who have lived on their land along the banks of Xingu river for generations. Today, just over two dozen families live here on Paquiçamba Territory, officially recognized as Indigenous land since 2014. It’s on the outskirts of the city of Altamira. I traveled up there from Carajas by us; 14 hours.  Here, the Belo Monte dam has wreaked havoc. Since the dam went online in 2016, it’s cut as much as 85% of the river flow downstream, drying up spawning areas and suffocating the river, killing tons of fish. Now, these same communities are under threat from a Canadian firm known as Belo Sun hoping to mine for gold upstream.  I’m on Manuel’s front porch. The sound of the insects hums through the damp drizzly air. He wears a green shirt. Checkered shorts. Short, unkept hair. “I was born and raised, right here,” he says, defiantly. We talk about many things that evening, his family members sitting around listening. But one thing in particular catches my attention. He tells me how a few years ago, some guys from town came and told them that the land was theirs. That they were there to take it back. But because of the recognition and demarcation process, Manuel had an official document from the Indigenous agency, Funai. It said the land belonged to the Juruna people. The guys eventually left.  But they weren’t the only ones. Attempts to steal Indigenous lands go back as far as the arrival of the first Europeans. In colonial times, powerful individuals would fabricate documents from official institutions that falsely showed they were the owners. Excuses to push people from their lands. It’s the same story today. But land invaders, empowered by president Bolsonaro, have been even more active, and more violent.  “Bolsonaro’s goal is to do away with the Juruna people,” says Manuel Verona’s grandson, Cleyson, who is now the chief of the tribe. “I believe he wants to do away with the Indigenous peoples altogether,” he says.  And amid the pandemic, Indigenous peoples’ resistance would be put to the test. That, in a minute. In March 2020, I was headed to visit the territory of the Munduruku. They’re an Indigenous people who live on the banks of the Tapajos River. Their land has been invaded by illegal miners. I was planning to spend a week traveling by boat across their land, visiting one village and the next. And then COVID hit. The trip was off. As best as they could, Indigenous peoples tried to shelter themselves from the outside to protect their communities from the virus. Isolated Indigenous groups were even more susceptible to coronavirus than the rest of the population. Bolsonaro’s government largely turned its back, claiming the disease wasn’t a big deal. Indigenous peoples were left to fend for themselves. Many communities set up their own health barricades, where they regulated who entered their lands, checked temperatures, and sprayed people down with disinfectant, if they could get it. Even so, COVID found its way through. The disease spread. The death count rose. Historic elders and leaders died: Chiefs Aritana Yawalapiti and Paulinho Paiakan, Macuxi artist and healer Vovó Bernaldina. And so many more. “We can’t bring these people back to reshare their knowledge that they acquired over their long lives,” Nildo Fontes, a member of the Tucano tribe in the upper Rio Negro told me. He lost his uncle, a tribal elder, to coronavirus. “Our biggest concern now,” he says, “is to monitor our leaders with traditional knowledge who are left and to find a way to record their learning in order to carry on.” Some peoples, like the Kayapó, protested, blocking a major highway, demanding attention from the government. Bolsonaro’s government would eventually send in the military. In one highly publicized case, a detachment of the Armed Forces flew to Yanomami communities and handed out boxes of the unproven drug hydroxychloroquine. When Brazil’s Senate investigated Bolsonaro’s handling of the pandemic, they accused the president of crimes against humanity, for failing to protect the country’s Indigenous populations. Over 1300 Indigenous people have died from COVID-19 according to the country’s largest Indigenous group, APIB. According to government figures, during the first four months of the pandemic in Brazil, the state Indigenous agency FUNAI spent less than $2.00 for each Indigenous person on efforts to fight COVID-19 in their communities. Meanwhile, the government actually decreased spending on Indigenous health during the pandemic.The lack of attention, however, was not only in the area of health. During the pandemic, Bolsonaro rolled back funding for protecting conservation areas and Indigenous territories and peoples. He further gutted the environment and Indigenous agencies. “The government has been completely absent,” Dinaman Tuxa, one of the leaders of APIB told me. “It’s the result of a political project focused on undoing Indigenous policies across the country and applying structural and institutional racism against Indigenous peoples,.” he said.  Illegal and armed actors took advantage. They pushed onto protected and Native territories like never before. No where was this felt more deeply than in Yanomami territory – The country’s largest Indigenous land, near the border with Venezuela. Today, there are roughly 20,000 illegal miners just on Yanomami land.  Their operations grew by 30% during the first year of the pandemic, with nearly 6,000 acres of land deforested. Images of mining areas there show a wasteland of cleared forest, brown dirt, and contaminated pits. The miners have brought disease, drugs, alcohol and other vices. And they have been violent.  It’s May 2021. A group of Indigenous Yanomami women in brightly colored shirts line a hillside in the Amazonian village of Palimiu. Suddenly, shots are fired from a speedboat rushing by on the river below. The women run, carrying babies in their arms, and dragging small children by their sides. They race off camera toward the trees.  According to reports, three people were injured. Two children died, drowned during the confusion. Out of fear, many slept in the forest, where some caught pneumonia. Attacks like this, with gunshots and tear gas bombs, continued against the community for days. “This is a threat to the life of the forest, a threat to the lives of people who live in their villages, the children, women, and elderly,” Dario Kopenawa, vice-president of the Hutukara association, told me. His group represents more than 370 Yanomami and Yekuana communities. “Mining kills,” he said. “It murders people. It destroys the life of Mother Earth.” Less than a month later, Indigenous peoples from across Brazil would take their indignation to the streets of Brasilia. June 2021. Almost a thousand members of Indigenous tribes across Brazil are camped in the country’s capital. They call it an occupation in defense of their lives. It comes just weeks after the violent attacks against the Yanonmami and amid legislative battles and a court case that could have lasting repercussions for Native peoples. “There are Indigenous peoples from across Brazil here,” said Ricardo Pataxó, a young Pataxó leader wearing a face mask and a beaded necklace. “We are resisting, and more Indigenous people are on their way.” They’ve shut down streets, protested in front of the Mining Ministry, and marched to the start of the Copa America soccer tournament. Their actions have been shared in countless videos over social media. This is the most active Indigenous mobilization in Brazil since the start of the pandemic, and it’s being organized by the country’s largest Indigenous organizations. In a video, rows of Indigenous men wearing armbands and feathered headdresses chant behind a metal fence. Before them, a pack of policemen block the locked entrance into a Congressional building. Inside, Indigenous representative Joênia Wapixana is fighting a bill that Bolsonaro’s allies are trying to push through Congress. If approved, the legislation would weaken Indigenous people’s right to their land and open the door to extractive industries. The thing is, Bolsonaro has powerful allies in Congress. I mean, his Agriculture Minister Tereza Cristina is the former head of Brazil’s Big Ag caucus. And that group composes literally half of Congressional representatives. They are powerful. Pro-agribusiness. Pro-development. And largely against rights for Indigenous peoples and small farmers.  Remember, large-scale agricultural production represents a substantial chunk of Brazil’s GDP. The country is the world’s number one producer and exporter of coffee, sugar, beef, poultry, orange juice, and tobacco, and number two in soybeans, after the US. The production of these products for export – Especially cattle and soy – Is a major factor driving the deforestation of the Amazon rainforest.  According to a recent report, powerful US banks, businesses, and mutual funds have invested billions of US dollars in companies financing Brazil’s agribusiness lobby. Among them are JP Morgan Chase, Bank of America, and BlackRock.  And agribusiness is responsible for the lion’s share of Brazilian deforestation –  97% of it in 2021. I saw the impact on the land first hand.  Up and down the state of Pará, cattle ranches line the major highways, stretching across the countryside off into the horizon. Only a few decades ago, these areas were dense jungle and inhabited by Indigenous communities. In the Amazon port city of Santarem, food and ag giant Cargill owns and operates a huge grain terminal that stretches out into the river, so it can easily load endless tons of soy onto the line of ships waiting to carry it into the world. Cargill alone exports more than 10 million tons of soy from Brazil each year. Soy fields pour out over the surrounding countryside. I mention all this because these powerful interests and multinational corporations have backed Bolsonaro’s push to increasingly open up the region to development.  But where some see growth, others see devastation. I don’t know if you heard, but the fires are back. This year, they got almost no media attention at all. But they were worse than last year, 2020, 2019, and any other point in the last decade.  Ane Alencar:  In the past three years, we reached another level of the deforestation, which is above ten thousand square kilometers a year. We haven’t seen that since 2008. Michael Fox:  That’s Ane Alencar, science director at the Brazilian Amazon Environmental Research Institute. Ane Alencar:  So 51% of all deforestation in the Brazilian Amazon biome has been happening in the past three years on public lands, which means that people are taking advantage of what is happening in terms of lower enforcement to invade and to grab public lands. Because this apparently became a lower-risk activity. Michael Fox:  Public lands. That means conservation areas, protected reserves. Indigenous territories. And this is not normal for Brazil. Neither is the organized crime connection – Ane says narcotraffickers and powerful illegal groups are also getting involved, investing a lot of resources to clear the forest for future land speculation, and, above all, cattle ranching. Ane Alencar:  People are putting machinery like tractors to pile up the logs and make this very clean deforestation operation, and when you set a fire you just completely clean the landscape. And this is in the middle of nowhere… So it means that people did a lot of investment to have this, and it’s on public land. It’s illegal. So they must be very confident that nothing’s going to happen for them to risk all this investment. Michael Fox:  These are the signs of the destruction wrought by Bolsonaro’s government. See, like I mentioned at the beginning of the episode, fires do not burn naturally in the Amazon. First, the forest is clear cut. In other words, every acre burned is an acre deforested. An acre taken out of the jungle and razed to make way for cattle, or soy, roads, or subdivisions, all pushed by Bolsonaro’s rhetoric and policies of destruction. That term: policies of destruction. It’s something I’ve heard a lot over the last four years. It’s come up in interview after interview. Many describe the actions of Bolsonaro’s government as policies of death. And the nearly 700,000 dead from COVID-19 accents this point perfectly. Policies of destruction. “Brazil is being destroyed, land, air, and sea,” Professor Emiliano Jose tells me. Remember, he’s the retired communications professor who we heard from in the last episode. He’s the one who fought the dictatorship in his youth. Emiliano Jose da Silva Filho:  It’s the complete destruction of the country. Bolsonaro is handing the country over to foreign interests. Handing out our oil reserves. Destroying the Amazon. Destroying the country. Michael Fox:  This isn’t just about the havoc that Bolsonaro is unleashing across Brazil. It’s also about how his government has gutted state agencies and state-owned businesses to do so. Auctioning off portions of the country: infrastructure, airports, natural resources, state assets. As Emiliano Jose said last episode: It’s about the unraveling of the country’s democracy. This is a key theme of fascism. The destruction of liberal democracy and the objectification of humanity, which often leads to violence. Violence – It’s a constant undercurrent in the Amazon. Last year, dozens of Indigenous people were killed there in conflicts over their territory. This year, the killing of two Indigenous allies, anthropologist Bruno Perreira and British reporter Dom Phillips, captured headlines for weeks. Dehumanization turned violence is also what we saw in the leadup to the 2018 elections – The killing of Moa de Katende, Charlione Lessa, Marielle Franco. And it’s what we’re seeing now, with numerous attacks and even killings in recent months, carried out mostly by Bolsonaro supporters against their political opponents. According to a recent poll, two thirds of Brazilians say they fear being attacked because of their political preferences. Two thirds. Here’s historian Federico Finchelstein, our go-to expert on fascism. Federico Finchelstein:  One of the key elements of fascism is this kind of radical dehumanization. This process of regarding subjects as objects not only applies to people, but also nature, and animals, and other forms of life. And I think this is one of the most horrible legacies of Bolsonaro. The attack not only on people – Let’s not forget his lies about COVID and his anti-vaccination drive – But also his lack of regard for nature as another dimension of his lack of regard for life, and treating everything as an object, and an object of manipulation. Michael Fox:  As we talked about back in the introduction to the podcast series, neither Federico nor I are saying that Brazil is a fascist state. It is not. It’s not a dictatorship. It is still a democracy, though with troubling backslides. Bolsonaro has pushed the country toward fascism in a dangerous way. And the upcoming presidential elections are really important.  Federico Finchelstein:  “This is not a mere Brazilian election or a mere election as such, but rather one that defines, I think, the future of democracy in our region, in Latin America and beyond. Whether this Trumpist recipe still works on or it continues its patterns of failures starting with January 6 in the US. And then we can learn a lot about if, as I hope, Brazilian society stays firm against these threats, that basically there is a way out of this authoritarianism, there is a way out of this manipulation and lies. And the way out is through democratic means. Michael Fox:  That is what’s at stake in the coming weeks in Brazil. The future of not just Brazil, but the region, including the United States. As the title for the introduction to the podcast series says: Democracy and Dictatorship. One model is devastation. The other is life. And tackling Amazon deforestation is not impossible. It’s happened before.  When Luiz Inácio Lula da Silva came to power twenty years ago, deforestation rates in the Amazon were through the roof. He enacted a series of landmark measures to curb deforestation.  New strict regulations, inspections, land registries, wood and meat tracking systems. These efforts were a huge success. They cut deforestation rates in half within two years. Brazil even hit its UN 2020 goal of reducing greenhouse gas emissions from deforestation ahead of schedule. Of course, that was before Bolsonaro unraveled it all. I want to bring one person back here. That’s Andre Karipuna. The young leader of the Karipuna people, who I introduced at the beginning of the episode. I spoke with him recently over the phone while he was in Brasilia, lobbying politicians and foreign embassies to pressure the Brazilian government to protect their people. He says over the last four years, things have only gotten worse. Andre Karipuna:  Everything has been undone. Rights for access to health. Protection of our territory. Education for Indigenous peoples. Michael Fox:  And he fears that the relentless attack could still get worse. He says that laws now under debate in Brasilia could strip Indigenous peoples of their rights across the country. When I first spoke with him four years ago, you could look on Google Earth and see his people’s territory. It was this large green block, surrounded by a checkerboard of shades of yellow – The roads, homes and farmlands that had been carved out of the jungle in recent decades. But even at the time, if you zoomed in and looked closely, you could see lines cutting into their land like little daggers.  Well, over the years, I’ve kept an eye on his territory from Google Earth. But lately, I’ve been kind of afraid to look. Where there were small slivers of roads before, today, whole parcels of the Karipuna territory are cleared, as if their land title isn’t worth the paper it’s printed on. Long lines dig into their land like thin bony fingers reaching into the very essence of their existence. They run North and South, East and West, following the pattern of the roads that are supposed to stop along the edge of their territory. The perimeter of their land is too large for the Karipuna people to patrol it all. And the state has largely abandoned them. So they’re doing their best to hold on.  This is the true cost of Bolsonaro and his government that has set the country ablaze. This is Brazil on Fire. That is all for this podcast series. I hope you’ve enjoyed it. I know I have. It’s been a labor of love for many years, and I am overjoyed to put it out into the world.  Now, although the first six episodes are over, the series is not. In the coming weeks, I hope to be bringing you some exclusive interviews on some of the topics I covered and many more that really deserve greater attention, like education and the big 2019 student rallies. Or the deep ties between Trump, Bannon, Bolsonaro, his kids, and so much more. I’ll also be updating you on the Brazilian elections in the coming weeks. The first round is this Sunday, October 2. So stay tuned and keep your eyes on the feed.  Before I go, I want to thank everyone who donated to my Kickstarter campaign for this podcast nearly two years ago. It was crucial to getting things off the ground, and I am forever grateful. In particular, I really want to thank the following people: Eric Kinzler, Jim Chomas, Tom Rall, Michael Locker, Supapan Kantithammikorn, Patti Simbulan (my old music teacher), Ryan Harvey, Dot Goodman, Greg Wilpert, Kayo de Oliveira, T.M. Scruggs, Bob Friedman, Lee Wilkinson, Kevin Danaher, Scott Bayliss, Sam Dodge, Gabriel Eiras Villa, Georgia Malki, Richard Black, Jacob Wascalus, Jordan Klein, Burke Stansbury, Ben Dangl, Marc Becker, Chris Michael. Diane Murphy. And in particular, a huge shout out to Judy Hughes, my parents Michael and Maryann Fox, and the Sawyers. Y’all are amazing. Thank you also to audio producers Zoe Sullivan and Jill Replogle for their guidance and support. And also thanks to PRI’s The World, for whom I first reported MANY of these stories that have appeared over these six episodes. And for whom I will be reporting from Brazil in the coming weeks. Finally, of course, many, many thanks to the teams at both The Real News and NACLA. It has been a pleasure. This is Brazil on Fire. Co-produced in collaboration with The Real News and NACLA. This series was edited by Heather Gies. Sound design by Gustavo Türck, at Coletivo Catarse. Theme music was by my band Monte Perdido. The other music was licensed from Blue Dot Sessions. '

text_dict = {'news' : [ text_cnn_news ] }
dataset_to_predict = pd.DataFrame(text_dict)
print(dataset_to_predict.iloc[:,:])

                                                news
0  CNN news The Amazon is a place that both Brazi...


In [21]:
X_to_predict = cv.fit_transform(dataset_to_predict['news']).toarray()

In [22]:
classifier.predict(X_to_predict)

array(['Photography'], dtype='<U31')

## The prediction is not right. We expect news, but got other. However we need to filter that text, removing what is not important

In [23]:
import nltk
from nltk import tokenize
from nltk.stem import PorterStemmer
import unicodedata

words_banned = nltk.corpus.stopwords.words("english")
token_punct = tokenize.WordPunctTokenizer()
stemmer = PorterStemmer()

# remove letters with punct and morphological variants
aux_pos = 0
for word_banned in words_banned:
    word_banned = unicodedata.normalize("NFD", word_banned)
    word_banned = word_banned.encode("ascii", "ignore")
    word_banned = word_banned.decode("utf-8")
    
    words_banned[aux_pos] = stemmer.stem(word_banned)
    aux_pos += 1

# remove possible duplicated words
words_banned = list(dict.fromkeys(words_banned))

# remove banned words
dataset_position = 0
for text in dataset_to_predict.news:
    phrase = list()
    # tokenize the text
    phrase_tokenized = token_punct.tokenize(text)
    stemmer = PorterStemmer()
    # treat the words
    for word in phrase_tokenized:
        # remove words with punct
        word = unicodedata.normalize("NFD", word)
        word = word.encode("ascii", "ignore")
        word = word.decode("utf-8")

        # remove capital letters
        word = word.lower()
        
        # remove morphological variants
        word = stemmer.stem(word)
        
        # if the word is not banned, add to the phrase
        if word not in words_banned:
            phrase.append(word)
            
    # append as one phrase
    phrase_processed = ' '.join(phrase)
    
    # save in the dataframe
    dataset_to_predict.loc[dataset_position,'news'] = phrase_processed
    
    # increment the row position
    dataset_position += 1

## After process the text:

In [24]:
print(dataset_to_predict.iloc[0,0])

cnn news amazon place brazil  former militari regim presid jair bolsonaro eye dream develop , look take advantag bounti resourc .  place bolsonaro  deconstruct state institut wreak havoc . illeg arm actor push formerli protect area plunder land make quick profit . indigen peopl constantli threat , whether stare barrel gun , fire , covid , territori way life attack  invad slowli eat away forest given life gener . bolsonaro empow behind devast . place brazil better symbol destruct bolsonaro wrought countri . amazon attack . fire visibl sign . brazil fire , podcast brazil  descent toward fascism presid jair bolsonaro . six episod look bolsonaro  far - right govern set countri ablaz , unit state help .  visit birthplac brazilian nazism , evangel church , indigen villag amazon . host latin america - base journalist michael fox . podcast produc partnership real news network nacla . edit heather gie . sound design gustavo turck . theme music mont perdido . transcript michael fox : sao paulo ,

## Predict the site content again

In [25]:
X_to_predict = cv.fit_transform(dataset_to_predict['news']).toarray()
classifier.predict(X_to_predict)

array(['News'], dtype='<U31')

## Now we got the right category :)